@jocmp/mercury-parser 3.0.7 → 3.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/generate-custom-parser.js +804 -621
- package/dist/generate-custom-parser.js.map +1 -1
- package/dist/mercury.js +616 -371
- package/dist/mercury.js.map +1 -1
- package/dist/mercury.web.js +2 -2
- package/dist/mercury.web.js.map +1 -1
- package/package.json +12 -16
|
@@ -23,7 +23,6 @@ var _Array$from = require('@babel/runtime-corejs2/core-js/array/from');
|
|
|
23
23
|
var _Symbol = require('@babel/runtime-corejs2/core-js/symbol');
|
|
24
24
|
var _Symbol$iterator = require('@babel/runtime-corejs2/core-js/symbol/iterator');
|
|
25
25
|
var _Array$isArray = require('@babel/runtime-corejs2/core-js/array/is-array');
|
|
26
|
-
var _typeof = require('@babel/runtime-corejs2/helpers/typeof');
|
|
27
26
|
var _Object$create = require('@babel/runtime-corejs2/core-js/object/create');
|
|
28
27
|
var _Object$freeze = require('@babel/runtime-corejs2/core-js/object/freeze');
|
|
29
28
|
var require$$7 = require('@babel/runtime-corejs2/helpers/objectWithoutProperties');
|
|
@@ -42,55 +41,8 @@ var require$$31 = require('dayjs/plugin/timezone');
|
|
|
42
41
|
var require$$32 = require('dayjs/plugin/customParseFormat');
|
|
43
42
|
var require$$33 = require('wuzzy');
|
|
44
43
|
var require$$34 = require('difflib');
|
|
45
|
-
var require$$35 = require('ellipsize');
|
|
46
44
|
var _taggedTemplateLiteral = require('@babel/runtime-corejs2/helpers/taggedTemplateLiteral');
|
|
47
45
|
|
|
48
|
-
function _interopDefaultLegacy (e) { return e && typeof e === 'object' && 'default' in e ? e : { 'default': e }; }
|
|
49
|
-
|
|
50
|
-
var _slicedToArray__default = /*#__PURE__*/_interopDefaultLegacy(_slicedToArray);
|
|
51
|
-
var _toConsumableArray__default = /*#__PURE__*/_interopDefaultLegacy(_toConsumableArray);
|
|
52
|
-
var fs__default = /*#__PURE__*/_interopDefaultLegacy(fs);
|
|
53
|
-
var inquirer__default = /*#__PURE__*/_interopDefaultLegacy(inquirer);
|
|
54
|
-
var ora__default = /*#__PURE__*/_interopDefaultLegacy(ora);
|
|
55
|
-
var _Reflect$ownKeys__default = /*#__PURE__*/_interopDefaultLegacy(_Reflect$ownKeys);
|
|
56
|
-
var _parseInt__default = /*#__PURE__*/_interopDefaultLegacy(_parseInt);
|
|
57
|
-
var URL__default = /*#__PURE__*/_interopDefaultLegacy(URL$1);
|
|
58
|
-
var _Object$keys__default = /*#__PURE__*/_interopDefaultLegacy(_Object$keys);
|
|
59
|
-
var _Object$getOwnPropertySymbols__default = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertySymbols);
|
|
60
|
-
var _Object$getOwnPropertyDescriptor__default = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertyDescriptor);
|
|
61
|
-
var _Object$getOwnPropertyDescriptors__default = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertyDescriptors);
|
|
62
|
-
var _Object$defineProperties__default = /*#__PURE__*/_interopDefaultLegacy(_Object$defineProperties);
|
|
63
|
-
var _Object$defineProperty__default = /*#__PURE__*/_interopDefaultLegacy(_Object$defineProperty);
|
|
64
|
-
var _defineProperty__default = /*#__PURE__*/_interopDefaultLegacy(_defineProperty);
|
|
65
|
-
var _parseFloat__default = /*#__PURE__*/_interopDefaultLegacy(_parseFloat);
|
|
66
|
-
var iconv__default = /*#__PURE__*/_interopDefaultLegacy(iconv);
|
|
67
|
-
var _Set__default = /*#__PURE__*/_interopDefaultLegacy(_Set);
|
|
68
|
-
var _Array$from__default = /*#__PURE__*/_interopDefaultLegacy(_Array$from);
|
|
69
|
-
var _Symbol__default = /*#__PURE__*/_interopDefaultLegacy(_Symbol);
|
|
70
|
-
var _Symbol$iterator__default = /*#__PURE__*/_interopDefaultLegacy(_Symbol$iterator);
|
|
71
|
-
var _Array$isArray__default = /*#__PURE__*/_interopDefaultLegacy(_Array$isArray);
|
|
72
|
-
var _typeof__default = /*#__PURE__*/_interopDefaultLegacy(_typeof);
|
|
73
|
-
var _Object$create__default = /*#__PURE__*/_interopDefaultLegacy(_Object$create);
|
|
74
|
-
var _Object$freeze__default = /*#__PURE__*/_interopDefaultLegacy(_Object$freeze);
|
|
75
|
-
var require$$7__default = /*#__PURE__*/_interopDefaultLegacy(require$$7);
|
|
76
|
-
var require$$8__default = /*#__PURE__*/_interopDefaultLegacy(require$$8);
|
|
77
|
-
var require$$9__default = /*#__PURE__*/_interopDefaultLegacy(require$$9);
|
|
78
|
-
var require$$11__default = /*#__PURE__*/_interopDefaultLegacy(require$$11);
|
|
79
|
-
var require$$12__default = /*#__PURE__*/_interopDefaultLegacy(require$$12);
|
|
80
|
-
var require$$16__default = /*#__PURE__*/_interopDefaultLegacy(require$$16);
|
|
81
|
-
var require$$17__default = /*#__PURE__*/_interopDefaultLegacy(require$$17);
|
|
82
|
-
var require$$26__default = /*#__PURE__*/_interopDefaultLegacy(require$$26);
|
|
83
|
-
var require$$27__default = /*#__PURE__*/_interopDefaultLegacy(require$$27);
|
|
84
|
-
var require$$28__default = /*#__PURE__*/_interopDefaultLegacy(require$$28);
|
|
85
|
-
var require$$29__default = /*#__PURE__*/_interopDefaultLegacy(require$$29);
|
|
86
|
-
var require$$30__default = /*#__PURE__*/_interopDefaultLegacy(require$$30);
|
|
87
|
-
var require$$31__default = /*#__PURE__*/_interopDefaultLegacy(require$$31);
|
|
88
|
-
var require$$32__default = /*#__PURE__*/_interopDefaultLegacy(require$$32);
|
|
89
|
-
var require$$33__default = /*#__PURE__*/_interopDefaultLegacy(require$$33);
|
|
90
|
-
var require$$34__default = /*#__PURE__*/_interopDefaultLegacy(require$$34);
|
|
91
|
-
var require$$35__default = /*#__PURE__*/_interopDefaultLegacy(require$$35);
|
|
92
|
-
var _taggedTemplateLiteral__default = /*#__PURE__*/_interopDefaultLegacy(_taggedTemplateLiteral);
|
|
93
|
-
|
|
94
46
|
// Spacer images to be removed
|
|
95
47
|
|
|
96
48
|
// The class we will use to mark elements we want to keep
|
|
@@ -104,7 +56,7 @@ function getAttrs(node) {
|
|
|
104
56
|
var attribs = node.attribs,
|
|
105
57
|
attributes = node.attributes;
|
|
106
58
|
if (!attribs && attributes) {
|
|
107
|
-
var attrs = _Reflect$
|
|
59
|
+
var attrs = _Reflect$ownKeys(attributes).reduce(function (acc, index) {
|
|
108
60
|
var attr = attributes[index];
|
|
109
61
|
|
|
110
62
|
// In browser, Reflect.ownKeys includes non-numeric keys like 'length', 'item', etc.
|
|
@@ -144,7 +96,7 @@ function absolutize($, rootUrl, attr) {
|
|
|
144
96
|
var attrs = getAttrs(node);
|
|
145
97
|
var url = attrs[attr];
|
|
146
98
|
if (!url) return;
|
|
147
|
-
var absoluteUrl =
|
|
99
|
+
var absoluteUrl = URL$1.resolve(baseUrl || rootUrl, url);
|
|
148
100
|
setAttr(node, attr, absoluteUrl);
|
|
149
101
|
});
|
|
150
102
|
}
|
|
@@ -162,10 +114,10 @@ function absolutizeSet($, rootUrl, $content) {
|
|
|
162
114
|
// a candidate URL cannot start or end with a comma
|
|
163
115
|
// descriptors are separated from the URLs by unescaped whitespace
|
|
164
116
|
var parts = candidate.trim().replace(/,$/, '').split(/\s+/);
|
|
165
|
-
parts[0] =
|
|
117
|
+
parts[0] = URL$1.resolve(rootUrl, parts[0]);
|
|
166
118
|
return parts.join(' ');
|
|
167
119
|
});
|
|
168
|
-
var absoluteUrlSet =
|
|
120
|
+
var absoluteUrlSet = _toConsumableArray(new _Set(absoluteCandidates)).join(', ');
|
|
169
121
|
setAttr(node, 'srcset', absoluteUrlSet);
|
|
170
122
|
}
|
|
171
123
|
});
|
|
@@ -187,55 +139,48 @@ var hasRequiredMercury;
|
|
|
187
139
|
function requireMercury() {
|
|
188
140
|
if (hasRequiredMercury) return mercury;
|
|
189
141
|
hasRequiredMercury = 1;
|
|
190
|
-
var _Object$keys = _Object$
|
|
191
|
-
var _Object$getOwnPropertySymbols = _Object$
|
|
192
|
-
var _Object$getOwnPropertyDescriptor = _Object$
|
|
193
|
-
var _Object$getOwnPropertyDescriptors = _Object$
|
|
194
|
-
var _Object$defineProperties = _Object$
|
|
195
|
-
var _Object$defineProperty = _Object$
|
|
196
|
-
var _defineProperty =
|
|
197
|
-
var _objectWithoutProperties = require$$
|
|
198
|
-
var _asyncToGenerator = require$$
|
|
199
|
-
var _regeneratorRuntime = require$$
|
|
200
|
-
var URL$1 =
|
|
201
|
-
var TurndownService = require$$
|
|
202
|
-
var cheerio = require$$
|
|
203
|
-
var iconv =
|
|
204
|
-
var _parseInt =
|
|
205
|
-
var _slicedToArray =
|
|
206
|
-
var _Promise = require$$
|
|
207
|
-
var request = require$$
|
|
208
|
-
var _Reflect$ownKeys = _Reflect$
|
|
209
|
-
var _toConsumableArray =
|
|
210
|
-
var _parseFloat =
|
|
211
|
-
var _Set =
|
|
212
|
-
var _Array$from = _Array$
|
|
213
|
-
var _Symbol =
|
|
214
|
-
var _Symbol$iterator = _Symbol$
|
|
215
|
-
var _Array$isArray = _Array$
|
|
216
|
-
var _Object$assign = require$$
|
|
217
|
-
var stringDirection = require$$
|
|
218
|
-
var _Number$isNaN = require$$
|
|
219
|
-
var dayjs = require$$
|
|
220
|
-
var utc = require$$
|
|
221
|
-
var timezonePlugin = require$$
|
|
222
|
-
var customParseFormat = require$$
|
|
223
|
-
var wuzzy = require$$
|
|
224
|
-
var difflib = require$$
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
return e && _typeof__default["default"](e) === 'object' && 'default' in e ? e : {
|
|
228
|
-
'default': e
|
|
229
|
-
};
|
|
230
|
-
}
|
|
231
|
-
function _interopNamespace(e) {
|
|
232
|
-
if (e && e.__esModule) return e;
|
|
233
|
-
var n = _Object$create__default["default"](null);
|
|
142
|
+
var _Object$keys$1 = _Object$keys;
|
|
143
|
+
var _Object$getOwnPropertySymbols$1 = _Object$getOwnPropertySymbols;
|
|
144
|
+
var _Object$getOwnPropertyDescriptor$1 = _Object$getOwnPropertyDescriptor;
|
|
145
|
+
var _Object$getOwnPropertyDescriptors$1 = _Object$getOwnPropertyDescriptors;
|
|
146
|
+
var _Object$defineProperties$1 = _Object$defineProperties;
|
|
147
|
+
var _Object$defineProperty$1 = _Object$defineProperty;
|
|
148
|
+
var _defineProperty$1 = _defineProperty;
|
|
149
|
+
var _objectWithoutProperties = require$$7;
|
|
150
|
+
var _asyncToGenerator = require$$8;
|
|
151
|
+
var _regeneratorRuntime = require$$9;
|
|
152
|
+
var URL$1$1 = URL$1;
|
|
153
|
+
var TurndownService = require$$11;
|
|
154
|
+
var cheerio = require$$12;
|
|
155
|
+
var iconv$1 = iconv;
|
|
156
|
+
var _parseInt$1 = _parseInt;
|
|
157
|
+
var _slicedToArray$1 = _slicedToArray;
|
|
158
|
+
var _Promise = require$$16;
|
|
159
|
+
var request = require$$17;
|
|
160
|
+
var _Reflect$ownKeys$1 = _Reflect$ownKeys;
|
|
161
|
+
var _toConsumableArray$1 = _toConsumableArray;
|
|
162
|
+
var _parseFloat$1 = _parseFloat;
|
|
163
|
+
var _Set$1 = _Set;
|
|
164
|
+
var _Array$from$1 = _Array$from;
|
|
165
|
+
var _Symbol$1 = _Symbol;
|
|
166
|
+
var _Symbol$iterator$1 = _Symbol$iterator;
|
|
167
|
+
var _Array$isArray$1 = _Array$isArray;
|
|
168
|
+
var _Object$assign = require$$26;
|
|
169
|
+
var stringDirection = require$$27;
|
|
170
|
+
var _Number$isNaN = require$$28;
|
|
171
|
+
var dayjs = require$$29;
|
|
172
|
+
var utc = require$$30;
|
|
173
|
+
var timezonePlugin = require$$31;
|
|
174
|
+
var customParseFormat = require$$32;
|
|
175
|
+
var wuzzy = require$$33;
|
|
176
|
+
var difflib = require$$34;
|
|
177
|
+
function _interopNamespaceDefault(e) {
|
|
178
|
+
var n = _Object$create(null);
|
|
234
179
|
if (e) {
|
|
235
|
-
_Object$
|
|
180
|
+
_Object$keys(e).forEach(function (k) {
|
|
236
181
|
if (k !== 'default') {
|
|
237
|
-
var d = _Object$
|
|
238
|
-
_Object$
|
|
182
|
+
var d = _Object$getOwnPropertyDescriptor(e, k);
|
|
183
|
+
_Object$defineProperty(n, k, d.get ? d : {
|
|
239
184
|
enumerable: true,
|
|
240
185
|
get: function get() {
|
|
241
186
|
return e[k];
|
|
@@ -245,44 +190,9 @@ function requireMercury() {
|
|
|
245
190
|
});
|
|
246
191
|
}
|
|
247
192
|
n["default"] = e;
|
|
248
|
-
return _Object$
|
|
249
|
-
}
|
|
250
|
-
var
|
|
251
|
-
var _Object$getOwnPropertySymbols__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertySymbols);
|
|
252
|
-
var _Object$getOwnPropertyDescriptor__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertyDescriptor);
|
|
253
|
-
var _Object$getOwnPropertyDescriptors__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Object$getOwnPropertyDescriptors);
|
|
254
|
-
var _Object$defineProperties__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Object$defineProperties);
|
|
255
|
-
var _Object$defineProperty__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Object$defineProperty);
|
|
256
|
-
var _defineProperty__default$1 = /*#__PURE__*/_interopDefaultLegacy(_defineProperty);
|
|
257
|
-
var _objectWithoutProperties__default = /*#__PURE__*/_interopDefaultLegacy(_objectWithoutProperties);
|
|
258
|
-
var _asyncToGenerator__default = /*#__PURE__*/_interopDefaultLegacy(_asyncToGenerator);
|
|
259
|
-
var _regeneratorRuntime__default = /*#__PURE__*/_interopDefaultLegacy(_regeneratorRuntime);
|
|
260
|
-
var URL__default$1 = /*#__PURE__*/_interopDefaultLegacy(URL$1);
|
|
261
|
-
var TurndownService__default = /*#__PURE__*/_interopDefaultLegacy(TurndownService);
|
|
262
|
-
var cheerio__namespace = /*#__PURE__*/_interopNamespace(cheerio);
|
|
263
|
-
var iconv__default$1 = /*#__PURE__*/_interopDefaultLegacy(iconv);
|
|
264
|
-
var _parseInt__default$1 = /*#__PURE__*/_interopDefaultLegacy(_parseInt);
|
|
265
|
-
var _slicedToArray__default$1 = /*#__PURE__*/_interopDefaultLegacy(_slicedToArray);
|
|
266
|
-
var _Promise__default = /*#__PURE__*/_interopDefaultLegacy(_Promise);
|
|
267
|
-
var request__default = /*#__PURE__*/_interopDefaultLegacy(request);
|
|
268
|
-
var _Reflect$ownKeys__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Reflect$ownKeys);
|
|
269
|
-
var _toConsumableArray__default$1 = /*#__PURE__*/_interopDefaultLegacy(_toConsumableArray);
|
|
270
|
-
var _parseFloat__default$1 = /*#__PURE__*/_interopDefaultLegacy(_parseFloat);
|
|
271
|
-
var _Set__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Set);
|
|
272
|
-
var _Array$from__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Array$from);
|
|
273
|
-
var _Symbol__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Symbol);
|
|
274
|
-
var _Symbol$iterator__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Symbol$iterator);
|
|
275
|
-
var _Array$isArray__default$1 = /*#__PURE__*/_interopDefaultLegacy(_Array$isArray);
|
|
276
|
-
var _Object$assign__default = /*#__PURE__*/_interopDefaultLegacy(_Object$assign);
|
|
277
|
-
var stringDirection__default = /*#__PURE__*/_interopDefaultLegacy(stringDirection);
|
|
278
|
-
var _Number$isNaN__default = /*#__PURE__*/_interopDefaultLegacy(_Number$isNaN);
|
|
279
|
-
var dayjs__default = /*#__PURE__*/_interopDefaultLegacy(dayjs);
|
|
280
|
-
var utc__default = /*#__PURE__*/_interopDefaultLegacy(utc);
|
|
281
|
-
var timezonePlugin__default = /*#__PURE__*/_interopDefaultLegacy(timezonePlugin);
|
|
282
|
-
var customParseFormat__default = /*#__PURE__*/_interopDefaultLegacy(customParseFormat);
|
|
283
|
-
var wuzzy__default = /*#__PURE__*/_interopDefaultLegacy(wuzzy);
|
|
284
|
-
var difflib__default = /*#__PURE__*/_interopDefaultLegacy(difflib);
|
|
285
|
-
var ellipsize__default = /*#__PURE__*/_interopDefaultLegacy(ellipsize);
|
|
193
|
+
return _Object$freeze(n);
|
|
194
|
+
}
|
|
195
|
+
var cheerio__namespace = /*#__PURE__*/_interopNamespaceDefault(cheerio);
|
|
286
196
|
var NORMALIZE_RE = /\s{2,}(?![^<>]*<\/(pre|code|textarea)>)/g;
|
|
287
197
|
function normalizeSpaces(text) {
|
|
288
198
|
return text.replace(NORMALIZE_RE, ' ').trim();
|
|
@@ -328,7 +238,7 @@ function requireMercury() {
|
|
|
328
238
|
function pageNumFromUrl(url) {
|
|
329
239
|
var matches = url.match(PAGE_IN_HREF_RE);
|
|
330
240
|
if (!matches) return null;
|
|
331
|
-
var pageNum =
|
|
241
|
+
var pageNum = _parseInt$1(matches[6], 10);
|
|
332
242
|
|
|
333
243
|
// Return pageNum < 100, otherwise
|
|
334
244
|
// return null
|
|
@@ -364,7 +274,7 @@ function requireMercury() {
|
|
|
364
274
|
// pagination data exists in it. Useful for comparing to other links
|
|
365
275
|
// that might have pagination data within them.
|
|
366
276
|
function articleBaseUrl(url, parsed) {
|
|
367
|
-
var parsedUrl = parsed ||
|
|
277
|
+
var parsedUrl = parsed || URL$1$1.parse(url);
|
|
368
278
|
var protocol = parsedUrl.protocol,
|
|
369
279
|
host = parsedUrl.host,
|
|
370
280
|
path = parsedUrl.path;
|
|
@@ -375,7 +285,7 @@ function requireMercury() {
|
|
|
375
285
|
// Split off and save anything that looks like a file type.
|
|
376
286
|
if (segment.includes('.')) {
|
|
377
287
|
var _segment$split = segment.split('.'),
|
|
378
|
-
_segment$split2 =
|
|
288
|
+
_segment$split2 = _slicedToArray$1(_segment$split, 2),
|
|
379
289
|
possibleSegment = _segment$split2[0],
|
|
380
290
|
fileExt = _segment$split2[1];
|
|
381
291
|
if (IS_ALPHA_RE.test(fileExt)) {
|
|
@@ -424,10 +334,10 @@ function requireMercury() {
|
|
|
424
334
|
var encoding = DEFAULT_ENCODING;
|
|
425
335
|
var matches = ENCODING_RE.exec(str);
|
|
426
336
|
if (matches !== null) {
|
|
427
|
-
var _matches =
|
|
337
|
+
var _matches = _slicedToArray$1(matches, 2);
|
|
428
338
|
str = _matches[1];
|
|
429
339
|
}
|
|
430
|
-
if (
|
|
340
|
+
if (iconv$1.encodingExists(str)) {
|
|
431
341
|
encoding = str;
|
|
432
342
|
}
|
|
433
343
|
return encoding;
|
|
@@ -453,11 +363,11 @@ function requireMercury() {
|
|
|
453
363
|
// for us to attempt parsing. Defaults to 5 MB.
|
|
454
364
|
var MAX_CONTENT_LENGTH = 5242880;
|
|
455
365
|
function ownKeys$h(e, r) {
|
|
456
|
-
var t = _Object$
|
|
457
|
-
if (_Object$
|
|
458
|
-
var o = _Object$
|
|
366
|
+
var t = _Object$keys$1(e);
|
|
367
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
368
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
459
369
|
r && (o = o.filter(function (r) {
|
|
460
|
-
return _Object$
|
|
370
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
461
371
|
})), t.push.apply(t, o);
|
|
462
372
|
}
|
|
463
373
|
return t;
|
|
@@ -465,17 +375,17 @@ function requireMercury() {
|
|
|
465
375
|
function _objectSpread$h(e) {
|
|
466
376
|
for (var r = 1; r < arguments.length; r++) {
|
|
467
377
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
468
|
-
r % 2 ? ownKeys$h(Object(t),
|
|
469
|
-
|
|
470
|
-
}) : _Object$
|
|
471
|
-
_Object$
|
|
378
|
+
r % 2 ? ownKeys$h(Object(t), true).forEach(function (r) {
|
|
379
|
+
_defineProperty$1(e, r, t[r]);
|
|
380
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$h(Object(t)).forEach(function (r) {
|
|
381
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
472
382
|
});
|
|
473
383
|
}
|
|
474
384
|
return e;
|
|
475
385
|
}
|
|
476
386
|
function get(options) {
|
|
477
|
-
return new
|
|
478
|
-
|
|
387
|
+
return new _Promise(function (resolve, reject) {
|
|
388
|
+
request(options, function (err, response, body) {
|
|
479
389
|
if (err) {
|
|
480
390
|
reject(err);
|
|
481
391
|
} else {
|
|
@@ -534,7 +444,7 @@ function requireMercury() {
|
|
|
534
444
|
return _fetchResource.apply(this, arguments);
|
|
535
445
|
}
|
|
536
446
|
function _fetchResource() {
|
|
537
|
-
_fetchResource =
|
|
447
|
+
_fetchResource = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime.mark(function _callee(url, parsedUrl) {
|
|
538
448
|
var headers,
|
|
539
449
|
options,
|
|
540
450
|
_yield$get,
|
|
@@ -542,11 +452,11 @@ function requireMercury() {
|
|
|
542
452
|
body,
|
|
543
453
|
_args = arguments,
|
|
544
454
|
_t;
|
|
545
|
-
return
|
|
455
|
+
return _regeneratorRuntime.wrap(function (_context) {
|
|
546
456
|
while (1) switch (_context.prev = _context.next) {
|
|
547
457
|
case 0:
|
|
548
458
|
headers = _args.length > 2 && _args[2] !== undefined ? _args[2] : {};
|
|
549
|
-
parsedUrl = parsedUrl ||
|
|
459
|
+
parsedUrl = parsedUrl || URL$1$1.parse(encodeURI(url));
|
|
550
460
|
options = _objectSpread$h({
|
|
551
461
|
url: parsedUrl.href,
|
|
552
462
|
headers: _objectSpread$h(_objectSpread$h({}, REQUEST_HEADERS), headers),
|
|
@@ -805,7 +715,7 @@ function requireMercury() {
|
|
|
805
715
|
var attribs = node.attribs,
|
|
806
716
|
attributes = node.attributes;
|
|
807
717
|
if (!attribs && attributes) {
|
|
808
|
-
var attrs = _Reflect$
|
|
718
|
+
var attrs = _Reflect$ownKeys$1(attributes).reduce(function (acc, index) {
|
|
809
719
|
var attr = attributes[index];
|
|
810
720
|
|
|
811
721
|
// In browser, Reflect.ownKeys includes non-numeric keys like 'length', 'item', etc.
|
|
@@ -824,7 +734,7 @@ function requireMercury() {
|
|
|
824
734
|
return $;
|
|
825
735
|
}
|
|
826
736
|
var attrs = getAttrs(node) || {};
|
|
827
|
-
var attribString = _Reflect$
|
|
737
|
+
var attribString = _Reflect$ownKeys$1(attrs).map(function (key) {
|
|
828
738
|
return "".concat(key, "=").concat(attrs[key]);
|
|
829
739
|
}).join(' ');
|
|
830
740
|
var html;
|
|
@@ -881,8 +791,8 @@ function requireMercury() {
|
|
|
881
791
|
return $;
|
|
882
792
|
}
|
|
883
793
|
function cleanForHeight($img, $) {
|
|
884
|
-
var height =
|
|
885
|
-
var width =
|
|
794
|
+
var height = _parseInt$1($img.attr('height'), 10);
|
|
795
|
+
var width = _parseInt$1($img.attr('width'), 10) || 20;
|
|
886
796
|
|
|
887
797
|
// Remove images that explicitly have very small heights or
|
|
888
798
|
// widths, because they are most likely shims or icons,
|
|
@@ -920,10 +830,10 @@ function requireMercury() {
|
|
|
920
830
|
tags = KEEP_SELECTORS;
|
|
921
831
|
}
|
|
922
832
|
if (url) {
|
|
923
|
-
var _URL$parse =
|
|
833
|
+
var _URL$parse = URL$1$1.parse(url),
|
|
924
834
|
protocol = _URL$parse.protocol,
|
|
925
835
|
hostname = _URL$parse.hostname;
|
|
926
|
-
tags = [].concat(
|
|
836
|
+
tags = [].concat(_toConsumableArray$1(tags), ["iframe[src^=\"".concat(protocol, "//").concat(hostname, "\"]")]);
|
|
927
837
|
}
|
|
928
838
|
$(tags.join(','), article).addClass(KEEP_CLASS);
|
|
929
839
|
return $;
|
|
@@ -963,18 +873,18 @@ function requireMercury() {
|
|
|
963
873
|
while (node.attributes.length > 0) {
|
|
964
874
|
node.removeAttribute(node.attributes[0].name);
|
|
965
875
|
}
|
|
966
|
-
_Reflect$
|
|
876
|
+
_Reflect$ownKeys$1(attrs).forEach(function (key) {
|
|
967
877
|
node.setAttribute(key, attrs[key]);
|
|
968
878
|
});
|
|
969
879
|
}
|
|
970
880
|
return node;
|
|
971
881
|
}
|
|
972
882
|
function ownKeys$g(e, r) {
|
|
973
|
-
var t = _Object$
|
|
974
|
-
if (_Object$
|
|
975
|
-
var o = _Object$
|
|
883
|
+
var t = _Object$keys$1(e);
|
|
884
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
885
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
976
886
|
r && (o = o.filter(function (r) {
|
|
977
|
-
return _Object$
|
|
887
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
978
888
|
})), t.push.apply(t, o);
|
|
979
889
|
}
|
|
980
890
|
return t;
|
|
@@ -982,10 +892,10 @@ function requireMercury() {
|
|
|
982
892
|
function _objectSpread$g(e) {
|
|
983
893
|
for (var r = 1; r < arguments.length; r++) {
|
|
984
894
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
985
|
-
r % 2 ? ownKeys$g(Object(t),
|
|
986
|
-
|
|
987
|
-
}) : _Object$
|
|
988
|
-
_Object$
|
|
895
|
+
r % 2 ? ownKeys$g(Object(t), true).forEach(function (r) {
|
|
896
|
+
_defineProperty$1(e, r, t[r]);
|
|
897
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$g(Object(t)).forEach(function (r) {
|
|
898
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
989
899
|
});
|
|
990
900
|
}
|
|
991
901
|
return e;
|
|
@@ -993,9 +903,9 @@ function requireMercury() {
|
|
|
993
903
|
function removeAllButWhitelist($article, $) {
|
|
994
904
|
$article.find('*').each(function (index, node) {
|
|
995
905
|
var attrs = getAttrs(node);
|
|
996
|
-
setAttrs(node, _Reflect$
|
|
906
|
+
setAttrs(node, _Reflect$ownKeys$1(attrs).reduce(function (acc, attr) {
|
|
997
907
|
if (WHITELIST_ATTRS_RE.test(attr)) {
|
|
998
|
-
return _objectSpread$g(_objectSpread$g({}, acc), {},
|
|
908
|
+
return _objectSpread$g(_objectSpread$g({}, acc), {}, _defineProperty$1({}, attr, attrs[attr]));
|
|
999
909
|
}
|
|
1000
910
|
return acc;
|
|
1001
911
|
}, {}));
|
|
@@ -1025,7 +935,7 @@ function requireMercury() {
|
|
|
1025
935
|
// the node's score attribute
|
|
1026
936
|
// returns null if no score set
|
|
1027
937
|
function getScore($node) {
|
|
1028
|
-
return
|
|
938
|
+
return _parseFloat$1($node.attr('score')) || null;
|
|
1029
939
|
}
|
|
1030
940
|
function setScore($node, $, score) {
|
|
1031
941
|
$node.attr('score', score);
|
|
@@ -1207,7 +1117,7 @@ function requireMercury() {
|
|
|
1207
1117
|
return score;
|
|
1208
1118
|
}
|
|
1209
1119
|
|
|
1210
|
-
// eslint-disable-next-line import/no-cycle
|
|
1120
|
+
// eslint-disable-next-line import-x/no-cycle
|
|
1211
1121
|
function addScore($node, $, amount) {
|
|
1212
1122
|
try {
|
|
1213
1123
|
var score = getOrInitScore($node, $) + amount;
|
|
@@ -1218,7 +1128,7 @@ function requireMercury() {
|
|
|
1218
1128
|
return $node;
|
|
1219
1129
|
}
|
|
1220
1130
|
|
|
1221
|
-
// eslint-disable-next-line import/no-cycle
|
|
1131
|
+
// eslint-disable-next-line import-x/no-cycle
|
|
1222
1132
|
|
|
1223
1133
|
// Adds 1/4 of a child's score to its parent
|
|
1224
1134
|
function addToParent(node, $, score) {
|
|
@@ -1410,7 +1320,7 @@ function requireMercury() {
|
|
|
1410
1320
|
var attrs = getAttrs(node);
|
|
1411
1321
|
var url = attrs[attr];
|
|
1412
1322
|
if (!url) return;
|
|
1413
|
-
var absoluteUrl =
|
|
1323
|
+
var absoluteUrl = URL$1$1.resolve(baseUrl || rootUrl, url);
|
|
1414
1324
|
setAttr(node, attr, absoluteUrl);
|
|
1415
1325
|
});
|
|
1416
1326
|
}
|
|
@@ -1428,10 +1338,10 @@ function requireMercury() {
|
|
|
1428
1338
|
// a candidate URL cannot start or end with a comma
|
|
1429
1339
|
// descriptors are separated from the URLs by unescaped whitespace
|
|
1430
1340
|
var parts = candidate.trim().replace(/,$/, '').split(/\s+/);
|
|
1431
|
-
parts[0] =
|
|
1341
|
+
parts[0] = URL$1$1.resolve(rootUrl, parts[0]);
|
|
1432
1342
|
return parts.join(' ');
|
|
1433
1343
|
});
|
|
1434
|
-
var absoluteUrlSet =
|
|
1344
|
+
var absoluteUrlSet = _toConsumableArray$1(new _Set$1(absoluteCandidates)).join(', ');
|
|
1435
1345
|
setAttr(node, 'srcset', absoluteUrlSet);
|
|
1436
1346
|
}
|
|
1437
1347
|
});
|
|
@@ -1452,9 +1362,9 @@ function requireMercury() {
|
|
|
1452
1362
|
return cleanText === '' ? text : cleanText;
|
|
1453
1363
|
}
|
|
1454
1364
|
function _createForOfIteratorHelper$4(r, e) {
|
|
1455
|
-
var t = "undefined" != typeof
|
|
1365
|
+
var t = "undefined" != typeof _Symbol$1 && r[_Symbol$iterator$1] || r["@@iterator"];
|
|
1456
1366
|
if (!t) {
|
|
1457
|
-
if (_Array$
|
|
1367
|
+
if (_Array$isArray$1(r) || (t = _unsupportedIterableToArray$4(r)) || e) {
|
|
1458
1368
|
t && (r = t);
|
|
1459
1369
|
var _n = 0,
|
|
1460
1370
|
F = function F() {};
|
|
@@ -1462,9 +1372,9 @@ function requireMercury() {
|
|
|
1462
1372
|
s: F,
|
|
1463
1373
|
n: function n() {
|
|
1464
1374
|
return _n >= r.length ? {
|
|
1465
|
-
done:
|
|
1375
|
+
done: true
|
|
1466
1376
|
} : {
|
|
1467
|
-
done:
|
|
1377
|
+
done: false,
|
|
1468
1378
|
value: r[_n++]
|
|
1469
1379
|
};
|
|
1470
1380
|
},
|
|
@@ -1477,8 +1387,8 @@ function requireMercury() {
|
|
|
1477
1387
|
throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.");
|
|
1478
1388
|
}
|
|
1479
1389
|
var o,
|
|
1480
|
-
a =
|
|
1481
|
-
u =
|
|
1390
|
+
a = true,
|
|
1391
|
+
u = false;
|
|
1482
1392
|
return {
|
|
1483
1393
|
s: function s() {
|
|
1484
1394
|
t = t.call(r);
|
|
@@ -1488,7 +1398,7 @@ function requireMercury() {
|
|
|
1488
1398
|
return a = r.done, r;
|
|
1489
1399
|
},
|
|
1490
1400
|
e: function e(r) {
|
|
1491
|
-
u =
|
|
1401
|
+
u = true, o = r;
|
|
1492
1402
|
},
|
|
1493
1403
|
f: function f() {
|
|
1494
1404
|
try {
|
|
@@ -1503,7 +1413,7 @@ function requireMercury() {
|
|
|
1503
1413
|
if (r) {
|
|
1504
1414
|
if ("string" == typeof r) return _arrayLikeToArray$4(r, a);
|
|
1505
1415
|
var t = {}.toString.call(r).slice(8, -1);
|
|
1506
|
-
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$
|
|
1416
|
+
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from$1(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$4(r, a) : void 0;
|
|
1507
1417
|
}
|
|
1508
1418
|
}
|
|
1509
1419
|
function _arrayLikeToArray$4(r, a) {
|
|
@@ -1519,8 +1429,6 @@ function requireMercury() {
|
|
|
1519
1429
|
var foundNames = metaNames.filter(function (name) {
|
|
1520
1430
|
return cachedNames.indexOf(name) !== -1;
|
|
1521
1431
|
});
|
|
1522
|
-
|
|
1523
|
-
// eslint-disable-next-line no-restricted-syntax
|
|
1524
1432
|
var _iterator = _createForOfIteratorHelper$4(foundNames),
|
|
1525
1433
|
_step;
|
|
1526
1434
|
try {
|
|
@@ -1550,7 +1458,7 @@ function requireMercury() {
|
|
|
1550
1458
|
if (cleanTags) {
|
|
1551
1459
|
metaValue = stripTags(values[0], $);
|
|
1552
1460
|
} else {
|
|
1553
|
-
var _values =
|
|
1461
|
+
var _values = _slicedToArray$1(values, 1);
|
|
1554
1462
|
metaValue = _values[0];
|
|
1555
1463
|
}
|
|
1556
1464
|
return {
|
|
@@ -1584,9 +1492,9 @@ function requireMercury() {
|
|
|
1584
1492
|
return commentParent !== undefined;
|
|
1585
1493
|
}
|
|
1586
1494
|
function _createForOfIteratorHelper$3(r, e) {
|
|
1587
|
-
var t = "undefined" != typeof
|
|
1495
|
+
var t = "undefined" != typeof _Symbol$1 && r[_Symbol$iterator$1] || r["@@iterator"];
|
|
1588
1496
|
if (!t) {
|
|
1589
|
-
if (_Array$
|
|
1497
|
+
if (_Array$isArray$1(r) || (t = _unsupportedIterableToArray$3(r)) || e) {
|
|
1590
1498
|
t && (r = t);
|
|
1591
1499
|
var _n = 0,
|
|
1592
1500
|
F = function F() {};
|
|
@@ -1594,9 +1502,9 @@ function requireMercury() {
|
|
|
1594
1502
|
s: F,
|
|
1595
1503
|
n: function n() {
|
|
1596
1504
|
return _n >= r.length ? {
|
|
1597
|
-
done:
|
|
1505
|
+
done: true
|
|
1598
1506
|
} : {
|
|
1599
|
-
done:
|
|
1507
|
+
done: false,
|
|
1600
1508
|
value: r[_n++]
|
|
1601
1509
|
};
|
|
1602
1510
|
},
|
|
@@ -1609,8 +1517,8 @@ function requireMercury() {
|
|
|
1609
1517
|
throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.");
|
|
1610
1518
|
}
|
|
1611
1519
|
var o,
|
|
1612
|
-
a =
|
|
1613
|
-
u =
|
|
1520
|
+
a = true,
|
|
1521
|
+
u = false;
|
|
1614
1522
|
return {
|
|
1615
1523
|
s: function s() {
|
|
1616
1524
|
t = t.call(r);
|
|
@@ -1620,7 +1528,7 @@ function requireMercury() {
|
|
|
1620
1528
|
return a = r.done, r;
|
|
1621
1529
|
},
|
|
1622
1530
|
e: function e(r) {
|
|
1623
|
-
u =
|
|
1531
|
+
u = true, o = r;
|
|
1624
1532
|
},
|
|
1625
1533
|
f: function f() {
|
|
1626
1534
|
try {
|
|
@@ -1635,7 +1543,7 @@ function requireMercury() {
|
|
|
1635
1543
|
if (r) {
|
|
1636
1544
|
if ("string" == typeof r) return _arrayLikeToArray$3(r, a);
|
|
1637
1545
|
var t = {}.toString.call(r).slice(8, -1);
|
|
1638
|
-
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$
|
|
1546
|
+
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from$1(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$3(r, a) : void 0;
|
|
1639
1547
|
}
|
|
1640
1548
|
}
|
|
1641
1549
|
function _arrayLikeToArray$3(r, a) {
|
|
@@ -1662,7 +1570,6 @@ function requireMercury() {
|
|
|
1662
1570
|
function extractFromSelectors($, selectors) {
|
|
1663
1571
|
var maxChildren = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 1;
|
|
1664
1572
|
var textOnly = arguments.length > 3 && arguments[3] !== undefined ? arguments[3] : true;
|
|
1665
|
-
// eslint-disable-next-line no-restricted-syntax
|
|
1666
1573
|
var _iterator = _createForOfIteratorHelper$3(selectors),
|
|
1667
1574
|
_step;
|
|
1668
1575
|
try {
|
|
@@ -1729,7 +1636,7 @@ function requireMercury() {
|
|
|
1729
1636
|
};
|
|
1730
1637
|
$('img').each(function (_, img) {
|
|
1731
1638
|
var attrs = getAttrs(img);
|
|
1732
|
-
_Reflect$
|
|
1639
|
+
_Reflect$ownKeys$1(attrs).forEach(function (attr) {
|
|
1733
1640
|
var value = attrs[attr];
|
|
1734
1641
|
if (attr !== 'srcset' && IS_LINK.test(value) && IS_SRCSET.test(value)) {
|
|
1735
1642
|
$(img).attr('srcset', value);
|
|
@@ -1769,9 +1676,9 @@ function requireMercury() {
|
|
|
1769
1676
|
create: function create(url, preparedResponse, parsedUrl) {
|
|
1770
1677
|
var _arguments = arguments,
|
|
1771
1678
|
_this = this;
|
|
1772
|
-
return
|
|
1679
|
+
return _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime.mark(function _callee() {
|
|
1773
1680
|
var headers, result, validResponse;
|
|
1774
|
-
return
|
|
1681
|
+
return _regeneratorRuntime.wrap(function (_context) {
|
|
1775
1682
|
while (1) switch (_context.prev = _context.next) {
|
|
1776
1683
|
case 0:
|
|
1777
1684
|
headers = _arguments.length > 3 && _arguments[3] !== undefined ? _arguments[3] : {};
|
|
@@ -1851,7 +1758,7 @@ function requireMercury() {
|
|
|
1851
1758
|
}
|
|
1852
1759
|
var encoding = getEncoding(contentType);
|
|
1853
1760
|
// UTF-8 is handled natively by Node.js, skip iconv-lite
|
|
1854
|
-
var decodedContent = encoding === 'utf-8' ? content.toString('utf-8') :
|
|
1761
|
+
var decodedContent = encoding === 'utf-8' ? content.toString('utf-8') : iconv$1.decode(content, encoding);
|
|
1855
1762
|
var $ = cheerio__namespace.load(decodedContent);
|
|
1856
1763
|
// after first cheerio.load, check to see if encoding matches
|
|
1857
1764
|
var contentTypeSelector = isBrowser ? 'meta[http-equiv=content-type]' : 'meta[http-equiv=content-type i]';
|
|
@@ -1860,7 +1767,7 @@ function requireMercury() {
|
|
|
1860
1767
|
|
|
1861
1768
|
// if encodings in the header/body dont match, use the one in the body
|
|
1862
1769
|
if (metaContentType && properEncoding !== encoding) {
|
|
1863
|
-
decodedContent = properEncoding === 'utf-8' ? content.toString('utf-8') :
|
|
1770
|
+
decodedContent = properEncoding === 'utf-8' ? content.toString('utf-8') : iconv$1.decode(content, properEncoding);
|
|
1864
1771
|
$ = cheerio__namespace.load(decodedContent);
|
|
1865
1772
|
}
|
|
1866
1773
|
return $;
|
|
@@ -1869,8 +1776,8 @@ function requireMercury() {
|
|
|
1869
1776
|
function range() {
|
|
1870
1777
|
var start = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 1;
|
|
1871
1778
|
var end = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : 1;
|
|
1872
|
-
return /*#__PURE__*/
|
|
1873
|
-
return
|
|
1779
|
+
return /*#__PURE__*/_regeneratorRuntime.mark(function _callee() {
|
|
1780
|
+
return _regeneratorRuntime.wrap(function (_context) {
|
|
1874
1781
|
while (1) switch (_context.prev = _context.next) {
|
|
1875
1782
|
case 0:
|
|
1876
1783
|
if (!(start <= end)) {
|
|
@@ -1903,7 +1810,7 @@ function requireMercury() {
|
|
|
1903
1810
|
}, {});
|
|
1904
1811
|
};
|
|
1905
1812
|
function mergeSupportedDomains(extractor) {
|
|
1906
|
-
return extractor.supportedDomains ? merge(extractor, [extractor.domain].concat(
|
|
1813
|
+
return extractor.supportedDomains ? merge(extractor, [extractor.domain].concat(_toConsumableArray$1(extractor.supportedDomains))) : merge(extractor, [extractor.domain]);
|
|
1907
1814
|
}
|
|
1908
1815
|
var apiExtractors = {};
|
|
1909
1816
|
function addExtractor(extractor) {
|
|
@@ -1913,7 +1820,7 @@ function requireMercury() {
|
|
|
1913
1820
|
message: 'Unable to add custom extractor. Invalid parameters.'
|
|
1914
1821
|
};
|
|
1915
1822
|
}
|
|
1916
|
-
_Object$
|
|
1823
|
+
_Object$assign(apiExtractors, mergeSupportedDomains(extractor));
|
|
1917
1824
|
return apiExtractors;
|
|
1918
1825
|
}
|
|
1919
1826
|
var BalloonJuiceComExtractor = {
|
|
@@ -2614,7 +2521,7 @@ function requireMercury() {
|
|
|
2614
2521
|
var $parent = $node.parents('figure');
|
|
2615
2522
|
if (ytRe.test(thumb)) {
|
|
2616
2523
|
var _thumb$match = thumb.match(ytRe),
|
|
2617
|
-
_thumb$match2 =
|
|
2524
|
+
_thumb$match2 = _slicedToArray$1(_thumb$match, 2);
|
|
2618
2525
|
_thumb$match2[0];
|
|
2619
2526
|
var youtubeId = _thumb$match2[1]; // eslint-disable-line
|
|
2620
2527
|
$node.attr('src', "https://www.youtube.com/embed/".concat(youtubeId));
|
|
@@ -2637,7 +2544,7 @@ function requireMercury() {
|
|
|
2637
2544
|
// Remove any smaller images that did not get caught by the generic image
|
|
2638
2545
|
// cleaner (author photo 48px, leading sentence images 79px, etc.).
|
|
2639
2546
|
img: function img($node) {
|
|
2640
|
-
var width =
|
|
2547
|
+
var width = _parseInt$1($node.attr('width'), 10);
|
|
2641
2548
|
if (width < 100) $node.remove();
|
|
2642
2549
|
}
|
|
2643
2550
|
},
|
|
@@ -3505,7 +3412,7 @@ function requireMercury() {
|
|
|
3505
3412
|
// before it's consumable content? E.g., unusual lazy loaded images
|
|
3506
3413
|
transforms: {
|
|
3507
3414
|
'.pane-node-body': function paneNodeBody($node, $) {
|
|
3508
|
-
var _WwwMsnbcComExtractor =
|
|
3415
|
+
var _WwwMsnbcComExtractor = _slicedToArray$1(WwwMsnbcComExtractor.lead_image_url.selectors[0], 2),
|
|
3509
3416
|
selector = _WwwMsnbcComExtractor[0],
|
|
3510
3417
|
attr = _WwwMsnbcComExtractor[1];
|
|
3511
3418
|
var src = $(selector).attr(attr);
|
|
@@ -5577,7 +5484,7 @@ function requireMercury() {
|
|
|
5577
5484
|
'img[data-original]': function imgDataOriginal($node) {
|
|
5578
5485
|
var dataOriginal = $node.attr('data-original');
|
|
5579
5486
|
var src = $node.attr('src');
|
|
5580
|
-
var url =
|
|
5487
|
+
var url = URL$1$1.resolve(src, dataOriginal);
|
|
5581
5488
|
$node.attr('src', url);
|
|
5582
5489
|
}
|
|
5583
5490
|
},
|
|
@@ -5869,9 +5776,6 @@ function requireMercury() {
|
|
|
5869
5776
|
clean: []
|
|
5870
5777
|
}
|
|
5871
5778
|
};
|
|
5872
|
-
|
|
5873
|
-
/* eslint-disable no-nested-ternary */
|
|
5874
|
-
/* eslint-disable no-unused-expressions */
|
|
5875
5779
|
var WwwAbendblattDeExtractor = {
|
|
5876
5780
|
domain: 'www.abendblatt.de',
|
|
5877
5781
|
title: {
|
|
@@ -6470,11 +6374,11 @@ function requireMercury() {
|
|
|
6470
6374
|
}
|
|
6471
6375
|
};
|
|
6472
6376
|
function ownKeys$f(e, r) {
|
|
6473
|
-
var t = _Object$
|
|
6474
|
-
if (_Object$
|
|
6475
|
-
var o = _Object$
|
|
6377
|
+
var t = _Object$keys$1(e);
|
|
6378
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6379
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6476
6380
|
r && (o = o.filter(function (r) {
|
|
6477
|
-
return _Object$
|
|
6381
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6478
6382
|
})), t.push.apply(t, o);
|
|
6479
6383
|
}
|
|
6480
6384
|
return t;
|
|
@@ -6482,10 +6386,10 @@ function requireMercury() {
|
|
|
6482
6386
|
function _objectSpread$f(e) {
|
|
6483
6387
|
for (var r = 1; r < arguments.length; r++) {
|
|
6484
6388
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6485
|
-
r % 2 ? ownKeys$f(Object(t),
|
|
6486
|
-
|
|
6487
|
-
}) : _Object$
|
|
6488
|
-
_Object$
|
|
6389
|
+
r % 2 ? ownKeys$f(Object(t), true).forEach(function (r) {
|
|
6390
|
+
_defineProperty$1(e, r, t[r]);
|
|
6391
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$f(Object(t)).forEach(function (r) {
|
|
6392
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6489
6393
|
});
|
|
6490
6394
|
}
|
|
6491
6395
|
return e;
|
|
@@ -6494,11 +6398,11 @@ function requireMercury() {
|
|
|
6494
6398
|
domain: 'sport.se.pl'
|
|
6495
6399
|
});
|
|
6496
6400
|
function ownKeys$e(e, r) {
|
|
6497
|
-
var t = _Object$
|
|
6498
|
-
if (_Object$
|
|
6499
|
-
var o = _Object$
|
|
6401
|
+
var t = _Object$keys$1(e);
|
|
6402
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6403
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6500
6404
|
r && (o = o.filter(function (r) {
|
|
6501
|
-
return _Object$
|
|
6405
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6502
6406
|
})), t.push.apply(t, o);
|
|
6503
6407
|
}
|
|
6504
6408
|
return t;
|
|
@@ -6506,10 +6410,10 @@ function requireMercury() {
|
|
|
6506
6410
|
function _objectSpread$e(e) {
|
|
6507
6411
|
for (var r = 1; r < arguments.length; r++) {
|
|
6508
6412
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6509
|
-
r % 2 ? ownKeys$e(Object(t),
|
|
6510
|
-
|
|
6511
|
-
}) : _Object$
|
|
6512
|
-
_Object$
|
|
6413
|
+
r % 2 ? ownKeys$e(Object(t), true).forEach(function (r) {
|
|
6414
|
+
_defineProperty$1(e, r, t[r]);
|
|
6415
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$e(Object(t)).forEach(function (r) {
|
|
6416
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6513
6417
|
});
|
|
6514
6418
|
}
|
|
6515
6419
|
return e;
|
|
@@ -6545,11 +6449,11 @@ function requireMercury() {
|
|
|
6545
6449
|
}
|
|
6546
6450
|
};
|
|
6547
6451
|
function ownKeys$d(e, r) {
|
|
6548
|
-
var t = _Object$
|
|
6549
|
-
if (_Object$
|
|
6550
|
-
var o = _Object$
|
|
6452
|
+
var t = _Object$keys$1(e);
|
|
6453
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6454
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6551
6455
|
r && (o = o.filter(function (r) {
|
|
6552
|
-
return _Object$
|
|
6456
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6553
6457
|
})), t.push.apply(t, o);
|
|
6554
6458
|
}
|
|
6555
6459
|
return t;
|
|
@@ -6557,10 +6461,10 @@ function requireMercury() {
|
|
|
6557
6461
|
function _objectSpread$d(e) {
|
|
6558
6462
|
for (var r = 1; r < arguments.length; r++) {
|
|
6559
6463
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6560
|
-
r % 2 ? ownKeys$d(Object(t),
|
|
6561
|
-
|
|
6562
|
-
}) : _Object$
|
|
6563
|
-
_Object$
|
|
6464
|
+
r % 2 ? ownKeys$d(Object(t), true).forEach(function (r) {
|
|
6465
|
+
_defineProperty$1(e, r, t[r]);
|
|
6466
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$d(Object(t)).forEach(function (r) {
|
|
6467
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6564
6468
|
});
|
|
6565
6469
|
}
|
|
6566
6470
|
return e;
|
|
@@ -6569,11 +6473,11 @@ function requireMercury() {
|
|
|
6569
6473
|
domain: 'szczecin.se.pl'
|
|
6570
6474
|
});
|
|
6571
6475
|
function ownKeys$c(e, r) {
|
|
6572
|
-
var t = _Object$
|
|
6573
|
-
if (_Object$
|
|
6574
|
-
var o = _Object$
|
|
6476
|
+
var t = _Object$keys$1(e);
|
|
6477
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6478
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6575
6479
|
r && (o = o.filter(function (r) {
|
|
6576
|
-
return _Object$
|
|
6480
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6577
6481
|
})), t.push.apply(t, o);
|
|
6578
6482
|
}
|
|
6579
6483
|
return t;
|
|
@@ -6581,10 +6485,10 @@ function requireMercury() {
|
|
|
6581
6485
|
function _objectSpread$c(e) {
|
|
6582
6486
|
for (var r = 1; r < arguments.length; r++) {
|
|
6583
6487
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6584
|
-
r % 2 ? ownKeys$c(Object(t),
|
|
6585
|
-
|
|
6586
|
-
}) : _Object$
|
|
6587
|
-
_Object$
|
|
6488
|
+
r % 2 ? ownKeys$c(Object(t), true).forEach(function (r) {
|
|
6489
|
+
_defineProperty$1(e, r, t[r]);
|
|
6490
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$c(Object(t)).forEach(function (r) {
|
|
6491
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6588
6492
|
});
|
|
6589
6493
|
}
|
|
6590
6494
|
return e;
|
|
@@ -6593,11 +6497,11 @@ function requireMercury() {
|
|
|
6593
6497
|
domain: 'superbiz.se.pl'
|
|
6594
6498
|
});
|
|
6595
6499
|
function ownKeys$b(e, r) {
|
|
6596
|
-
var t = _Object$
|
|
6597
|
-
if (_Object$
|
|
6598
|
-
var o = _Object$
|
|
6500
|
+
var t = _Object$keys$1(e);
|
|
6501
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6502
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6599
6503
|
r && (o = o.filter(function (r) {
|
|
6600
|
-
return _Object$
|
|
6504
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6601
6505
|
})), t.push.apply(t, o);
|
|
6602
6506
|
}
|
|
6603
6507
|
return t;
|
|
@@ -6605,10 +6509,10 @@ function requireMercury() {
|
|
|
6605
6509
|
function _objectSpread$b(e) {
|
|
6606
6510
|
for (var r = 1; r < arguments.length; r++) {
|
|
6607
6511
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6608
|
-
r % 2 ? ownKeys$b(Object(t),
|
|
6609
|
-
|
|
6610
|
-
}) : _Object$
|
|
6611
|
-
_Object$
|
|
6512
|
+
r % 2 ? ownKeys$b(Object(t), true).forEach(function (r) {
|
|
6513
|
+
_defineProperty$1(e, r, t[r]);
|
|
6514
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$b(Object(t)).forEach(function (r) {
|
|
6515
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6612
6516
|
});
|
|
6613
6517
|
}
|
|
6614
6518
|
return e;
|
|
@@ -6638,11 +6542,11 @@ function requireMercury() {
|
|
|
6638
6542
|
}
|
|
6639
6543
|
};
|
|
6640
6544
|
function ownKeys$a(e, r) {
|
|
6641
|
-
var t = _Object$
|
|
6642
|
-
if (_Object$
|
|
6643
|
-
var o = _Object$
|
|
6545
|
+
var t = _Object$keys$1(e);
|
|
6546
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6547
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6644
6548
|
r && (o = o.filter(function (r) {
|
|
6645
|
-
return _Object$
|
|
6549
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6646
6550
|
})), t.push.apply(t, o);
|
|
6647
6551
|
}
|
|
6648
6552
|
return t;
|
|
@@ -6650,10 +6554,10 @@ function requireMercury() {
|
|
|
6650
6554
|
function _objectSpread$a(e) {
|
|
6651
6555
|
for (var r = 1; r < arguments.length; r++) {
|
|
6652
6556
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6653
|
-
r % 2 ? ownKeys$a(Object(t),
|
|
6654
|
-
|
|
6655
|
-
}) : _Object$
|
|
6656
|
-
_Object$
|
|
6557
|
+
r % 2 ? ownKeys$a(Object(t), true).forEach(function (r) {
|
|
6558
|
+
_defineProperty$1(e, r, t[r]);
|
|
6559
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$a(Object(t)).forEach(function (r) {
|
|
6560
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6657
6561
|
});
|
|
6658
6562
|
}
|
|
6659
6563
|
return e;
|
|
@@ -6662,11 +6566,11 @@ function requireMercury() {
|
|
|
6662
6566
|
domain: 'lodz.se.pl'
|
|
6663
6567
|
});
|
|
6664
6568
|
function ownKeys$9(e, r) {
|
|
6665
|
-
var t = _Object$
|
|
6666
|
-
if (_Object$
|
|
6667
|
-
var o = _Object$
|
|
6569
|
+
var t = _Object$keys$1(e);
|
|
6570
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6571
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6668
6572
|
r && (o = o.filter(function (r) {
|
|
6669
|
-
return _Object$
|
|
6573
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6670
6574
|
})), t.push.apply(t, o);
|
|
6671
6575
|
}
|
|
6672
6576
|
return t;
|
|
@@ -6674,10 +6578,10 @@ function requireMercury() {
|
|
|
6674
6578
|
function _objectSpread$9(e) {
|
|
6675
6579
|
for (var r = 1; r < arguments.length; r++) {
|
|
6676
6580
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6677
|
-
r % 2 ? ownKeys$9(Object(t),
|
|
6678
|
-
|
|
6679
|
-
}) : _Object$
|
|
6680
|
-
_Object$
|
|
6581
|
+
r % 2 ? ownKeys$9(Object(t), true).forEach(function (r) {
|
|
6582
|
+
_defineProperty$1(e, r, t[r]);
|
|
6583
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$9(Object(t)).forEach(function (r) {
|
|
6584
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6681
6585
|
});
|
|
6682
6586
|
}
|
|
6683
6587
|
return e;
|
|
@@ -6686,11 +6590,11 @@ function requireMercury() {
|
|
|
6686
6590
|
domain: 'wroclaw.se.pl'
|
|
6687
6591
|
});
|
|
6688
6592
|
function ownKeys$8(e, r) {
|
|
6689
|
-
var t = _Object$
|
|
6690
|
-
if (_Object$
|
|
6691
|
-
var o = _Object$
|
|
6593
|
+
var t = _Object$keys$1(e);
|
|
6594
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6595
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6692
6596
|
r && (o = o.filter(function (r) {
|
|
6693
|
-
return _Object$
|
|
6597
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6694
6598
|
})), t.push.apply(t, o);
|
|
6695
6599
|
}
|
|
6696
6600
|
return t;
|
|
@@ -6698,10 +6602,10 @@ function requireMercury() {
|
|
|
6698
6602
|
function _objectSpread$8(e) {
|
|
6699
6603
|
for (var r = 1; r < arguments.length; r++) {
|
|
6700
6604
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6701
|
-
r % 2 ? ownKeys$8(Object(t),
|
|
6702
|
-
|
|
6703
|
-
}) : _Object$
|
|
6704
|
-
_Object$
|
|
6605
|
+
r % 2 ? ownKeys$8(Object(t), true).forEach(function (r) {
|
|
6606
|
+
_defineProperty$1(e, r, t[r]);
|
|
6607
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$8(Object(t)).forEach(function (r) {
|
|
6608
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6705
6609
|
});
|
|
6706
6610
|
}
|
|
6707
6611
|
return e;
|
|
@@ -6710,11 +6614,11 @@ function requireMercury() {
|
|
|
6710
6614
|
domain: 'lublin.se.pl'
|
|
6711
6615
|
});
|
|
6712
6616
|
function ownKeys$7(e, r) {
|
|
6713
|
-
var t = _Object$
|
|
6714
|
-
if (_Object$
|
|
6715
|
-
var o = _Object$
|
|
6617
|
+
var t = _Object$keys$1(e);
|
|
6618
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
6619
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
6716
6620
|
r && (o = o.filter(function (r) {
|
|
6717
|
-
return _Object$
|
|
6621
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
6718
6622
|
})), t.push.apply(t, o);
|
|
6719
6623
|
}
|
|
6720
6624
|
return t;
|
|
@@ -6722,10 +6626,10 @@ function requireMercury() {
|
|
|
6722
6626
|
function _objectSpread$7(e) {
|
|
6723
6627
|
for (var r = 1; r < arguments.length; r++) {
|
|
6724
6628
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
6725
|
-
r % 2 ? ownKeys$7(Object(t),
|
|
6726
|
-
|
|
6727
|
-
}) : _Object$
|
|
6728
|
-
_Object$
|
|
6629
|
+
r % 2 ? ownKeys$7(Object(t), true).forEach(function (r) {
|
|
6630
|
+
_defineProperty$1(e, r, t[r]);
|
|
6631
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$7(Object(t)).forEach(function (r) {
|
|
6632
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
6729
6633
|
});
|
|
6730
6634
|
}
|
|
6731
6635
|
return e;
|
|
@@ -6975,7 +6879,7 @@ function requireMercury() {
|
|
|
6975
6879
|
img: function img($node) {
|
|
6976
6880
|
var srcset = $node.attr('srcset');
|
|
6977
6881
|
var _split = (srcset || '').split(','),
|
|
6978
|
-
_split2 =
|
|
6882
|
+
_split2 = _slicedToArray$1(_split, 1),
|
|
6979
6883
|
src = _split2[0];
|
|
6980
6884
|
if (src) {
|
|
6981
6885
|
$node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
|
|
@@ -7014,7 +6918,7 @@ function requireMercury() {
|
|
|
7014
6918
|
img: function img($node) {
|
|
7015
6919
|
var srcset = $node.attr('srcset');
|
|
7016
6920
|
var _split = (srcset || '').split(','),
|
|
7017
|
-
_split2 =
|
|
6921
|
+
_split2 = _slicedToArray$1(_split, 1),
|
|
7018
6922
|
src = _split2[0];
|
|
7019
6923
|
if (src) {
|
|
7020
6924
|
$node.parent().replaceWith("<figure><img srcset=\"".concat(srcset, "\" src=\"").concat(src, "\"/></figure>"));
|
|
@@ -7529,11 +7433,11 @@ function requireMercury() {
|
|
|
7529
7433
|
}
|
|
7530
7434
|
};
|
|
7531
7435
|
function ownKeys$6(e, r) {
|
|
7532
|
-
var t = _Object$
|
|
7533
|
-
if (_Object$
|
|
7534
|
-
var o = _Object$
|
|
7436
|
+
var t = _Object$keys$1(e);
|
|
7437
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
7438
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
7535
7439
|
r && (o = o.filter(function (r) {
|
|
7536
|
-
return _Object$
|
|
7440
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
7537
7441
|
})), t.push.apply(t, o);
|
|
7538
7442
|
}
|
|
7539
7443
|
return t;
|
|
@@ -7541,10 +7445,10 @@ function requireMercury() {
|
|
|
7541
7445
|
function _objectSpread$6(e) {
|
|
7542
7446
|
for (var r = 1; r < arguments.length; r++) {
|
|
7543
7447
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
7544
|
-
r % 2 ? ownKeys$6(Object(t),
|
|
7545
|
-
|
|
7546
|
-
}) : _Object$
|
|
7547
|
-
_Object$
|
|
7448
|
+
r % 2 ? ownKeys$6(Object(t), true).forEach(function (r) {
|
|
7449
|
+
_defineProperty$1(e, r, t[r]);
|
|
7450
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$6(Object(t)).forEach(function (r) {
|
|
7451
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
7548
7452
|
});
|
|
7549
7453
|
}
|
|
7550
7454
|
return e;
|
|
@@ -7552,212 +7456,445 @@ function requireMercury() {
|
|
|
7552
7456
|
var GrEuronewsComExtractor = _objectSpread$6(_objectSpread$6({}, WwwEuronewsComExtractor), {}, {
|
|
7553
7457
|
domain: 'gr.euronews.com'
|
|
7554
7458
|
});
|
|
7555
|
-
var
|
|
7459
|
+
var WwwIlfattoquotidianoItExtractor = {
|
|
7460
|
+
domain: 'www.ilfattoquotidiano.it',
|
|
7461
|
+
title: {
|
|
7462
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7463
|
+
},
|
|
7464
|
+
author: {
|
|
7465
|
+
selectors: ['.ifq-post__author .ifq-news-meta__author-name'],
|
|
7466
|
+
clean: ['span']
|
|
7467
|
+
},
|
|
7468
|
+
date_published: {
|
|
7469
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
7470
|
+
},
|
|
7471
|
+
dek: {
|
|
7472
|
+
selectors: [['meta[name="og:description"]', 'value']]
|
|
7473
|
+
},
|
|
7474
|
+
lead_image_url: {
|
|
7475
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7476
|
+
},
|
|
7477
|
+
content: {
|
|
7478
|
+
selectors: ['.ifq-post__content', 'article'],
|
|
7479
|
+
transforms: {},
|
|
7480
|
+
clean: []
|
|
7481
|
+
}
|
|
7482
|
+
};
|
|
7483
|
+
var ActualidadRtComExtractor = {
|
|
7484
|
+
domain: 'actualidad.rt.com',
|
|
7485
|
+
title: {
|
|
7486
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7487
|
+
},
|
|
7488
|
+
author: {
|
|
7489
|
+
selectors: [['meta[name="article:author"]', 'value']]
|
|
7490
|
+
},
|
|
7491
|
+
date_published: {
|
|
7492
|
+
selectors: [['meta[name="mediator_published_time"]', 'value']]
|
|
7493
|
+
},
|
|
7494
|
+
dek: {
|
|
7495
|
+
selectors: [['meta[name="og:description"]', 'value']]
|
|
7496
|
+
},
|
|
7497
|
+
lead_image_url: {
|
|
7498
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7499
|
+
},
|
|
7500
|
+
content: {
|
|
7501
|
+
selectors: ['.ArticleView-text'],
|
|
7502
|
+
transforms: {},
|
|
7503
|
+
// RT wraps each <img> in a <picture> whose <source> elements carry a
|
|
7504
|
+
// base64 placeholder srcset; browsers honor that over the real <img src>,
|
|
7505
|
+
// so drop the sources and let the <img> (real URL) render.
|
|
7506
|
+
clean: ['.ReadMore-root', 'source']
|
|
7507
|
+
}
|
|
7508
|
+
};
|
|
7509
|
+
var WwwTweaktownComExtractor = {
|
|
7510
|
+
domain: 'www.tweaktown.com',
|
|
7511
|
+
title: {
|
|
7512
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7513
|
+
},
|
|
7514
|
+
author: {
|
|
7515
|
+
selectors: ['.info-bar-div2 a[rel="author"]']
|
|
7516
|
+
},
|
|
7517
|
+
date_published: {
|
|
7518
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
7519
|
+
},
|
|
7520
|
+
dek: {
|
|
7521
|
+
selectors: [['meta[name="og:description"]', 'value']]
|
|
7522
|
+
},
|
|
7523
|
+
lead_image_url: {
|
|
7524
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7525
|
+
},
|
|
7526
|
+
content: {
|
|
7527
|
+
selectors: ['#article-body'],
|
|
7528
|
+
transforms: {},
|
|
7529
|
+
clean: []
|
|
7530
|
+
}
|
|
7531
|
+
};
|
|
7532
|
+
var WwwFrandroidComExtractor = {
|
|
7533
|
+
domain: 'www.frandroid.com',
|
|
7534
|
+
title: {
|
|
7535
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7536
|
+
},
|
|
7537
|
+
author: {
|
|
7538
|
+
selectors: [['meta[name="parsely-author"]', 'value']]
|
|
7539
|
+
},
|
|
7540
|
+
date_published: {
|
|
7541
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
7542
|
+
},
|
|
7543
|
+
dek: {
|
|
7544
|
+
selectors: [['meta[name="og:description"]', 'value']]
|
|
7545
|
+
},
|
|
7546
|
+
lead_image_url: {
|
|
7547
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7548
|
+
},
|
|
7549
|
+
content: {
|
|
7550
|
+
selectors: ['section.article-content'],
|
|
7551
|
+
transforms: {
|
|
7552
|
+
h2: function h2(node) {
|
|
7553
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
7554
|
+
},
|
|
7555
|
+
h3: function h3(node) {
|
|
7556
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
7557
|
+
}
|
|
7558
|
+
},
|
|
7559
|
+
clean: ['.index-menu-wrapper', '.is-gastric-kingfisher', '.newsletter-form', '.share', '.article-footer', '.js-feed-posts', '.optidigital-adslot', '[id^="optidigital-adslot"]']
|
|
7560
|
+
}
|
|
7561
|
+
};
|
|
7562
|
+
var WwwMotorsportComExtractor = {
|
|
7563
|
+
domain: 'www.motorsport.com',
|
|
7564
|
+
title: {
|
|
7565
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7566
|
+
},
|
|
7567
|
+
author: {
|
|
7568
|
+
selectors: ['.msnt-author-toolbar a[href*="/info/about-us/"]']
|
|
7569
|
+
},
|
|
7570
|
+
date_published: {
|
|
7571
|
+
selectors: [['meta[name="datePublished"]', 'value']]
|
|
7572
|
+
},
|
|
7573
|
+
dek: {
|
|
7574
|
+
selectors: ['h2.text-article-description']
|
|
7575
|
+
},
|
|
7576
|
+
lead_image_url: {
|
|
7577
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7578
|
+
},
|
|
7579
|
+
content: {
|
|
7580
|
+
selectors: ['.ms-article-content'],
|
|
7581
|
+
transforms: {
|
|
7582
|
+
h2: function h2(node) {
|
|
7583
|
+
return node.attr('class', 'mercury-parser-keep');
|
|
7584
|
+
}
|
|
7585
|
+
},
|
|
7586
|
+
clean: ['msnt-survey-promo', '.article-fullwidth-gallery_item ~ .article-fullwidth-gallery_item', '.ms-inarticle-widgets', '.relatedContent', '.ms-apb', '.ms-ap-native', '.outstream_partner']
|
|
7587
|
+
}
|
|
7588
|
+
};
|
|
7589
|
+
var SubstackComExtractor = {
|
|
7590
|
+
domain: 'substack.com',
|
|
7591
|
+
title: {
|
|
7592
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7593
|
+
},
|
|
7594
|
+
author: {
|
|
7595
|
+
selectors: [['meta[name="author"]', 'value']]
|
|
7596
|
+
},
|
|
7597
|
+
date_published: {
|
|
7598
|
+
selectors: [['meta[name="article:published_time"]', 'value']]
|
|
7599
|
+
},
|
|
7600
|
+
dek: {
|
|
7601
|
+
selectors: [['meta[name="og:description"]', 'value']]
|
|
7602
|
+
},
|
|
7603
|
+
lead_image_url: {
|
|
7604
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7605
|
+
},
|
|
7606
|
+
content: {
|
|
7607
|
+
selectors: ['.available-content'],
|
|
7608
|
+
transforms: {
|
|
7609
|
+
'div.captioned-image-container': 'figure',
|
|
7610
|
+
'div.image-link': function divImageLink($node) {
|
|
7611
|
+
$node.replaceWith($node.find('img'));
|
|
7612
|
+
}
|
|
7613
|
+
},
|
|
7614
|
+
clean: ['.subscribe-widget', '.subscription-widget-wrap', '.subscription-widget-wrap-editor', '.button-wrapper', '.poll-embed', '.share-dialog']
|
|
7615
|
+
}
|
|
7616
|
+
};
|
|
7617
|
+
var WwwDwComExtractor = {
|
|
7618
|
+
domain: 'www.dw.com',
|
|
7619
|
+
title: {
|
|
7620
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7621
|
+
},
|
|
7622
|
+
author: {
|
|
7623
|
+
selectors: ['.author-name .author-link']
|
|
7624
|
+
},
|
|
7625
|
+
date_published: {
|
|
7626
|
+
selectors: [['meta[name="date"]', 'value']]
|
|
7627
|
+
},
|
|
7628
|
+
dek: {
|
|
7629
|
+
selectors: [['meta[name="og:description"]', 'value']]
|
|
7630
|
+
},
|
|
7631
|
+
lead_image_url: {
|
|
7632
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7633
|
+
},
|
|
7634
|
+
content: {
|
|
7635
|
+
selectors: ['[data-tracking-name="rich-text"]'],
|
|
7636
|
+
transforms: {
|
|
7637
|
+
// DW inline images are responsive: the real template lives in data-url
|
|
7638
|
+
// with a literal ${formatId} size token that JS would replace, leaving a
|
|
7639
|
+
// broken src in the raw HTML. Resolve it to a standard content size.
|
|
7640
|
+
img: function img(node) {
|
|
7641
|
+
var template = node.attr('data-url') || node.attr('src') || '';
|
|
7642
|
+
if (template.includes('${formatId}')) {
|
|
7643
|
+
node.attr('src', template.replace('${formatId}', '6'));
|
|
7644
|
+
}
|
|
7645
|
+
}
|
|
7646
|
+
},
|
|
7647
|
+
// Embedded tweets are non-functional fallback markup without JS.
|
|
7648
|
+
clean: ['blockquote.tweet.embed']
|
|
7649
|
+
}
|
|
7650
|
+
};
|
|
7651
|
+
var WwwAnimenewsnetworkComExtractor = {
|
|
7652
|
+
domain: 'www.animenewsnetwork.com',
|
|
7653
|
+
title: {
|
|
7654
|
+
selectors: [['meta[name="og:title"]', 'value']]
|
|
7655
|
+
},
|
|
7656
|
+
author: null,
|
|
7657
|
+
date_published: {
|
|
7658
|
+
selectors: [['small time', 'datetime']]
|
|
7659
|
+
},
|
|
7660
|
+
dek: {
|
|
7661
|
+
selectors: [['meta[name="description"]', 'value']]
|
|
7662
|
+
},
|
|
7663
|
+
lead_image_url: {
|
|
7664
|
+
selectors: [['meta[name="og:image"]', 'value']]
|
|
7665
|
+
},
|
|
7666
|
+
content: {
|
|
7667
|
+
selectors: ['.KonaBody'],
|
|
7668
|
+
transforms: {
|
|
7669
|
+
// Images are lazy-loaded: real URL in data-src, a spacer.gif in src.
|
|
7670
|
+
// Promote data-src so the images survive cleaning and render.
|
|
7671
|
+
img: function img(node) {
|
|
7672
|
+
var dataSrc = node.attr('data-src');
|
|
7673
|
+
if (dataSrc) {
|
|
7674
|
+
var src = dataSrc.startsWith('/') ? "https://www.animenewsnetwork.com".concat(dataSrc) : dataSrc;
|
|
7675
|
+
node.attr('src', src);
|
|
7676
|
+
node.removeAttr('data-src');
|
|
7677
|
+
}
|
|
7678
|
+
}
|
|
7679
|
+
},
|
|
7680
|
+
// .intro duplicates the dek; instaread-player is an audio widget.
|
|
7681
|
+
clean: ['.intro', 'instaread-player']
|
|
7682
|
+
}
|
|
7683
|
+
};
|
|
7684
|
+
var CustomExtractors = /*#__PURE__*/_Object$freeze({
|
|
7556
7685
|
__proto__: null,
|
|
7686
|
+
AbcnewsGoComExtractor: AbcnewsGoComExtractor,
|
|
7687
|
+
ActualidadRtComExtractor: ActualidadRtComExtractor,
|
|
7688
|
+
ApartmentTherapyExtractor: ApartmentTherapyExtractor,
|
|
7689
|
+
ArstechnicaComExtractor: ArstechnicaComExtractor,
|
|
7557
7690
|
BalloonJuiceComExtractor: BalloonJuiceComExtractor,
|
|
7691
|
+
BialystokSePlExtractor: BialystokSePlExtractor,
|
|
7692
|
+
BiorxivOrgExtractor: BiorxivOrgExtractor,
|
|
7693
|
+
BlisterreviewComExtractor: BlisterreviewComExtractor,
|
|
7558
7694
|
BloggerExtractor: BloggerExtractor,
|
|
7559
|
-
|
|
7560
|
-
|
|
7561
|
-
|
|
7562
|
-
|
|
7563
|
-
TheAtlanticExtractor: TheAtlanticExtractor,
|
|
7564
|
-
NewYorkerExtractor: NewYorkerExtractor,
|
|
7565
|
-
WiredExtractor: WiredExtractor,
|
|
7566
|
-
MSNExtractor: MSNExtractor,
|
|
7567
|
-
YahooExtractor: YahooExtractor,
|
|
7695
|
+
BookwalkerJpExtractor: BookwalkerJpExtractor,
|
|
7696
|
+
BroadwayWorldExtractor: BroadwayWorldExtractor,
|
|
7697
|
+
BskyAppExtractor: BskyAppExtractor,
|
|
7698
|
+
BuzzapJpExtractor: BuzzapJpExtractor,
|
|
7568
7699
|
BuzzfeedExtractor: BuzzfeedExtractor,
|
|
7569
|
-
|
|
7570
|
-
|
|
7571
|
-
|
|
7700
|
+
ChicagoyimbyComExtractor: ChicagoyimbyComExtractor,
|
|
7701
|
+
ClinicaltrialsGovExtractor: ClinicaltrialsGovExtractor,
|
|
7702
|
+
DeadlineComExtractor: DeadlineComExtractor,
|
|
7572
7703
|
DeadspinExtractor: DeadspinExtractor,
|
|
7573
|
-
|
|
7574
|
-
|
|
7704
|
+
EconomictimesIndiatimesComExtractor: EconomictimesIndiatimesComExtractor,
|
|
7705
|
+
EpaperZeitDeExtractor: EpaperZeitDeExtractor,
|
|
7706
|
+
FactorioComExtractor: FactorioComExtractor,
|
|
7707
|
+
FortuneComExtractor: FortuneComExtractor,
|
|
7708
|
+
ForwardComExtractor: ForwardComExtractor,
|
|
7709
|
+
GeniusComExtractor: GeniusComExtractor,
|
|
7710
|
+
GetnewsJpExtractor: GetnewsJpExtractor,
|
|
7711
|
+
GithubComExtractor: GithubComExtractor,
|
|
7712
|
+
GonintendoComExtractor: GonintendoComExtractor,
|
|
7713
|
+
GothamistComExtractor: GothamistComExtractor,
|
|
7714
|
+
GrEuronewsComExtractor: GrEuronewsComExtractor,
|
|
7715
|
+
HellogigglesComExtractor: HellogigglesComExtractor,
|
|
7716
|
+
IciRadioCanadaCaExtractor: IciRadioCanadaCaExtractor,
|
|
7717
|
+
JapanCnetComExtractor: JapanCnetComExtractor,
|
|
7718
|
+
JapanZdnetComExtractor: JapanZdnetComExtractor,
|
|
7719
|
+
JvndbJvnJpExtractor: JvndbJvnJpExtractor,
|
|
7720
|
+
LittleThingsExtractor: LittleThingsExtractor,
|
|
7721
|
+
LodzSePlExtractor: LodzSePlExtractor,
|
|
7722
|
+
LublinSePlExtractor: LublinSePlExtractor,
|
|
7723
|
+
MSNExtractor: MSNExtractor,
|
|
7724
|
+
MaTtiasBeExtractor: MaTtiasBeExtractor,
|
|
7725
|
+
MashableComExtractor: MashableComExtractor,
|
|
7575
7726
|
MediumExtractor: MediumExtractor,
|
|
7576
|
-
|
|
7577
|
-
WwwWashingtonpostComExtractor: WwwWashingtonpostComExtractor,
|
|
7578
|
-
WwwHuffingtonpostComExtractor: WwwHuffingtonpostComExtractor,
|
|
7579
|
-
NewrepublicComExtractor: NewrepublicComExtractor,
|
|
7727
|
+
MobilesyrupComExtractor: MobilesyrupComExtractor,
|
|
7580
7728
|
MoneyCnnComExtractor: MoneyCnnComExtractor,
|
|
7581
|
-
|
|
7582
|
-
|
|
7583
|
-
|
|
7584
|
-
|
|
7585
|
-
|
|
7586
|
-
WwwBloombergComExtractor: WwwBloombergComExtractor,
|
|
7587
|
-
WwwBustleComExtractor: WwwBustleComExtractor,
|
|
7588
|
-
WwwNprOrgExtractor: WwwNprOrgExtractor,
|
|
7589
|
-
WwwRecodeNetExtractor: WwwRecodeNetExtractor,
|
|
7590
|
-
QzComExtractor: QzComExtractor,
|
|
7591
|
-
WwwDmagazineComExtractor: WwwDmagazineComExtractor,
|
|
7592
|
-
WwwReutersComExtractor: WwwReutersComExtractor,
|
|
7593
|
-
MashableComExtractor: MashableComExtractor,
|
|
7594
|
-
WwwChicagotribuneComExtractor: WwwChicagotribuneComExtractor,
|
|
7595
|
-
WwwVoxComExtractor: WwwVoxComExtractor,
|
|
7729
|
+
NYMagExtractor: NYMagExtractor,
|
|
7730
|
+
NYTimesExtractor: NYTimesExtractor,
|
|
7731
|
+
NewYorkerExtractor: NewYorkerExtractor,
|
|
7732
|
+
NewrepublicComExtractor: NewrepublicComExtractor,
|
|
7733
|
+
NewsMynaviJpExtractor: NewsMynaviJpExtractor,
|
|
7596
7734
|
NewsNationalgeographicComExtractor: NewsNationalgeographicComExtractor,
|
|
7597
|
-
|
|
7598
|
-
|
|
7735
|
+
NewsPtsOrgTwExtractor: NewsPtsOrgTwExtractor,
|
|
7736
|
+
Nineto5googleComExtractor: Nineto5googleComExtractor,
|
|
7737
|
+
Nineto5linuxComExtractor: Nineto5linuxComExtractor,
|
|
7738
|
+
Nineto5macComExtractor: Nineto5macComExtractor,
|
|
7739
|
+
ObamawhitehouseArchivesGovExtractor: ObamawhitehouseArchivesGovExtractor,
|
|
7740
|
+
ObserverComExtractor: ObserverComExtractor,
|
|
7741
|
+
OrfAtExtractor: OrfAtExtractor,
|
|
7742
|
+
OtrsComExtractor: OtrsComExtractor,
|
|
7599
7743
|
PagesixComExtractor: PagesixComExtractor,
|
|
7600
|
-
|
|
7601
|
-
WwwCbssportsComExtractor: WwwCbssportsComExtractor,
|
|
7602
|
-
WwwMsnbcComExtractor: WwwMsnbcComExtractor,
|
|
7603
|
-
WwwThepoliticalinsiderComExtractor: WwwThepoliticalinsiderComExtractor,
|
|
7604
|
-
WwwMentalflossComExtractor: WwwMentalflossComExtractor,
|
|
7605
|
-
AbcnewsGoComExtractor: AbcnewsGoComExtractor,
|
|
7606
|
-
WwwNydailynewsComExtractor: WwwNydailynewsComExtractor,
|
|
7607
|
-
WwwCnbcComExtractor: WwwCnbcComExtractor,
|
|
7608
|
-
WwwPopsugarComExtractor: WwwPopsugarComExtractor,
|
|
7609
|
-
ObserverComExtractor: ObserverComExtractor,
|
|
7744
|
+
PastebinComExtractor: PastebinComExtractor,
|
|
7610
7745
|
PeopleComExtractor: PeopleComExtractor,
|
|
7611
|
-
|
|
7612
|
-
|
|
7613
|
-
|
|
7746
|
+
PhpspotOrgExtractor: PhpspotOrgExtractor,
|
|
7747
|
+
PitchforkComExtractor: PitchforkComExtractor,
|
|
7748
|
+
PoliticoExtractor: PoliticoExtractor,
|
|
7749
|
+
PolitykaSePlExtractor: PolitykaSePlExtractor,
|
|
7750
|
+
PolskisamorzadSePlExtractor: PolskisamorzadSePlExtractor,
|
|
7751
|
+
PortalobronnySePlExtractor: PortalobronnySePlExtractor,
|
|
7752
|
+
QzComExtractor: QzComExtractor,
|
|
7753
|
+
ScanNetsecurityNeJpExtractor: ScanNetsecurityNeJpExtractor,
|
|
7754
|
+
ScienceflyComExtractor: ScienceflyComExtractor,
|
|
7755
|
+
SectIijAdJpExtractor: SectIijAdJpExtractor,
|
|
7756
|
+
SgNewsYahooComExtractor: SgNewsYahooComExtractor,
|
|
7757
|
+
SpektrumExtractor: SpektrumExtractor,
|
|
7758
|
+
SportSePlExtractor: SportSePlExtractor,
|
|
7759
|
+
SubstackComExtractor: SubstackComExtractor,
|
|
7760
|
+
SuperbizSePlExtractor: SuperbizSePlExtractor,
|
|
7761
|
+
SuperserialeSePlExtractor: SuperserialeSePlExtractor,
|
|
7762
|
+
SzczecinSePlExtractor: SzczecinSePlExtractor,
|
|
7763
|
+
TakagihiromitsuJpExtractor: TakagihiromitsuJpExtractor,
|
|
7764
|
+
TarnkappeInfoExtractor: TarnkappeInfoExtractor,
|
|
7765
|
+
TechcrunchComExtractor: TechcrunchComExtractor,
|
|
7766
|
+
TechlogIijAdJpExtractor: TechlogIijAdJpExtractor,
|
|
7767
|
+
TerminaltroveComExtractor: TerminaltroveComExtractor,
|
|
7768
|
+
TheAtlanticExtractor: TheAtlanticExtractor,
|
|
7769
|
+
ThefederalistpapersOrgExtractor: ThefederalistpapersOrgExtractor,
|
|
7770
|
+
ThoughtcatalogComExtractor: ThoughtcatalogComExtractor,
|
|
7771
|
+
TimesofindiaIndiatimesComExtractor: TimesofindiaIndiatimesComExtractor,
|
|
7772
|
+
TldrTechExtractor: TldrTechExtractor,
|
|
7773
|
+
TwitterExtractor: TwitterExtractor,
|
|
7614
7774
|
UproxxComExtractor: UproxxComExtractor,
|
|
7615
|
-
|
|
7616
|
-
|
|
7617
|
-
|
|
7618
|
-
|
|
7619
|
-
|
|
7620
|
-
|
|
7621
|
-
|
|
7622
|
-
|
|
7623
|
-
|
|
7775
|
+
WccftechComExtractor: WccftechComExtractor,
|
|
7776
|
+
WeeklyAsciiJpExtractor: WeeklyAsciiJpExtractor,
|
|
7777
|
+
WikiaExtractor: WikiaExtractor,
|
|
7778
|
+
WikipediaExtractor: WikipediaExtractor,
|
|
7779
|
+
WiredExtractor: WiredExtractor,
|
|
7780
|
+
WiredJpExtractor: WiredJpExtractor,
|
|
7781
|
+
WroclawSePlExtractor: WroclawSePlExtractor,
|
|
7782
|
+
Www1pezeshkComExtractor: Www1pezeshkComExtractor,
|
|
7783
|
+
WwwAbendblattDeExtractor: WwwAbendblattDeExtractor,
|
|
7624
7784
|
WwwAlComExtractor: WwwAlComExtractor,
|
|
7625
|
-
WwwThepennyhoarderComExtractor: WwwThepennyhoarderComExtractor,
|
|
7626
|
-
WwwWesternjournalismComExtractor: WwwWesternjournalismComExtractor,
|
|
7627
7785
|
WwwAmericanowComExtractor: WwwAmericanowComExtractor,
|
|
7628
|
-
|
|
7629
|
-
|
|
7630
|
-
|
|
7631
|
-
|
|
7632
|
-
WwwNbcnewsComExtractor: WwwNbcnewsComExtractor,
|
|
7633
|
-
FortuneComExtractor: FortuneComExtractor,
|
|
7634
|
-
WwwLinkedinComExtractor: WwwLinkedinComExtractor,
|
|
7635
|
-
ObamawhitehouseArchivesGovExtractor: ObamawhitehouseArchivesGovExtractor,
|
|
7636
|
-
WwwOpposingviewsComExtractor: WwwOpposingviewsComExtractor,
|
|
7637
|
-
WwwProspectmagazineCoUkExtractor: WwwProspectmagazineCoUkExtractor,
|
|
7638
|
-
ForwardComExtractor: ForwardComExtractor,
|
|
7639
|
-
WwwQdailyComExtractor: WwwQdailyComExtractor,
|
|
7640
|
-
GothamistComExtractor: GothamistComExtractor,
|
|
7641
|
-
WwwFoolComExtractor: WwwFoolComExtractor,
|
|
7642
|
-
WwwSlateComExtractor: WwwSlateComExtractor,
|
|
7643
|
-
IciRadioCanadaCaExtractor: IciRadioCanadaCaExtractor,
|
|
7644
|
-
WwwFortinetComExtractor: WwwFortinetComExtractor,
|
|
7645
|
-
WwwFastcompanyComExtractor: WwwFastcompanyComExtractor,
|
|
7646
|
-
BlisterreviewComExtractor: BlisterreviewComExtractor,
|
|
7647
|
-
NewsMynaviJpExtractor: NewsMynaviJpExtractor,
|
|
7648
|
-
ClinicaltrialsGovExtractor: ClinicaltrialsGovExtractor,
|
|
7649
|
-
GithubComExtractor: GithubComExtractor,
|
|
7650
|
-
WwwRedditComExtractor: WwwRedditComExtractor,
|
|
7651
|
-
OtrsComExtractor: OtrsComExtractor,
|
|
7652
|
-
WwwOssnewsJpExtractor: WwwOssnewsJpExtractor,
|
|
7653
|
-
BuzzapJpExtractor: BuzzapJpExtractor,
|
|
7786
|
+
WwwAndroidauthorityComExtractor: WwwAndroidauthorityComExtractor,
|
|
7787
|
+
WwwAndroidcentralComExtractor: WwwAndroidcentralComExtractor,
|
|
7788
|
+
WwwAnimenewsnetworkComExtractor: WwwAnimenewsnetworkComExtractor,
|
|
7789
|
+
WwwAolComExtractor: WwwAolComExtractor,
|
|
7654
7790
|
WwwAsahiComExtractor: WwwAsahiComExtractor,
|
|
7655
|
-
|
|
7791
|
+
WwwBlickDeExtractor: WwwBlickDeExtractor,
|
|
7792
|
+
WwwBloombergComExtractor: WwwBloombergComExtractor,
|
|
7793
|
+
WwwBustleComExtractor: WwwBustleComExtractor,
|
|
7794
|
+
WwwCbcCaExtractor: WwwCbcCaExtractor,
|
|
7795
|
+
WwwCbssportsComExtractor: WwwCbssportsComExtractor,
|
|
7796
|
+
WwwChannelnewsasiaComExtractor: WwwChannelnewsasiaComExtractor,
|
|
7797
|
+
WwwChicagotribuneComExtractor: WwwChicagotribuneComExtractor,
|
|
7798
|
+
WwwCnbcComExtractor: WwwCnbcComExtractor,
|
|
7799
|
+
WwwCnetComExtractor: WwwCnetComExtractor,
|
|
7800
|
+
WwwCnnComExtractor: WwwCnnComExtractor,
|
|
7801
|
+
WwwDmagazineComExtractor: WwwDmagazineComExtractor,
|
|
7802
|
+
WwwDwComExtractor: WwwDwComExtractor,
|
|
7656
7803
|
WwwElecomCoJpExtractor: WwwElecomCoJpExtractor,
|
|
7657
|
-
|
|
7658
|
-
|
|
7659
|
-
|
|
7660
|
-
|
|
7661
|
-
|
|
7662
|
-
|
|
7663
|
-
|
|
7664
|
-
|
|
7665
|
-
|
|
7666
|
-
TakagihiromitsuJpExtractor: TakagihiromitsuJpExtractor,
|
|
7667
|
-
BookwalkerJpExtractor: BookwalkerJpExtractor,
|
|
7668
|
-
WwwYomiuriCoJpExtractor: WwwYomiuriCoJpExtractor,
|
|
7669
|
-
JapanCnetComExtractor: JapanCnetComExtractor,
|
|
7670
|
-
DeadlineComExtractor: DeadlineComExtractor,
|
|
7804
|
+
WwwEngadgetComExtractor: WwwEngadgetComExtractor,
|
|
7805
|
+
WwwEonlineComExtractor: WwwEonlineComExtractor,
|
|
7806
|
+
WwwEuronewsComExtractor: WwwEuronewsComExtractor,
|
|
7807
|
+
WwwFastcompanyComExtractor: WwwFastcompanyComExtractor,
|
|
7808
|
+
WwwFlatpanelshdComExtractor: WwwFlatpanelshdComExtractor,
|
|
7809
|
+
WwwFoolComExtractor: WwwFoolComExtractor,
|
|
7810
|
+
WwwFortinetComExtractor: WwwFortinetComExtractor,
|
|
7811
|
+
WwwFrandroidComExtractor: WwwFrandroidComExtractor,
|
|
7812
|
+
WwwFuturaSciencesComExtractor: WwwFuturaSciencesComExtractor,
|
|
7671
7813
|
WwwGizmodoJpExtractor: WwwGizmodoJpExtractor,
|
|
7672
|
-
GetnewsJpExtractor: GetnewsJpExtractor,
|
|
7673
|
-
WwwLifehackerJpExtractor: WwwLifehackerJpExtractor,
|
|
7674
|
-
SectIijAdJpExtractor: SectIijAdJpExtractor,
|
|
7675
|
-
WwwOreillyCoJpExtractor: WwwOreillyCoJpExtractor,
|
|
7676
|
-
WwwIpaGoJpExtractor: WwwIpaGoJpExtractor,
|
|
7677
|
-
WeeklyAsciiJpExtractor: WeeklyAsciiJpExtractor,
|
|
7678
|
-
TechlogIijAdJpExtractor: TechlogIijAdJpExtractor,
|
|
7679
|
-
WiredJpExtractor: WiredJpExtractor,
|
|
7680
|
-
JapanZdnetComExtractor: JapanZdnetComExtractor,
|
|
7681
|
-
WwwRbbtodayComExtractor: WwwRbbtodayComExtractor,
|
|
7682
|
-
WwwLemondeFrExtractor: WwwLemondeFrExtractor,
|
|
7683
|
-
WwwPhoronixComExtractor: WwwPhoronixComExtractor,
|
|
7684
|
-
PitchforkComExtractor: PitchforkComExtractor,
|
|
7685
|
-
BiorxivOrgExtractor: BiorxivOrgExtractor,
|
|
7686
|
-
EpaperZeitDeExtractor: EpaperZeitDeExtractor,
|
|
7687
|
-
WwwLadbibleComExtractor: WwwLadbibleComExtractor,
|
|
7688
|
-
TimesofindiaIndiatimesComExtractor: TimesofindiaIndiatimesComExtractor,
|
|
7689
|
-
MaTtiasBeExtractor: MaTtiasBeExtractor,
|
|
7690
|
-
PastebinComExtractor: PastebinComExtractor,
|
|
7691
|
-
WwwAbendblattDeExtractor: WwwAbendblattDeExtractor,
|
|
7692
7814
|
WwwGrueneDeExtractor: WwwGrueneDeExtractor,
|
|
7693
|
-
ArstechnicaComExtractor: ArstechnicaComExtractor,
|
|
7694
|
-
WwwNdtvComExtractor: WwwNdtvComExtractor,
|
|
7695
|
-
SpektrumExtractor: SpektrumExtractor,
|
|
7696
|
-
WwwInvestmentexecutiveComExtractor: WwwInvestmentexecutiveComExtractor,
|
|
7697
|
-
WwwCbcCaExtractor: WwwCbcCaExtractor,
|
|
7698
|
-
WwwVersantsComExtractor: WwwVersantsComExtractor,
|
|
7699
|
-
Www1pezeshkComExtractor: Www1pezeshkComExtractor,
|
|
7700
|
-
WwwAndroidauthorityComExtractor: WwwAndroidauthorityComExtractor,
|
|
7701
|
-
TechcrunchComExtractor: TechcrunchComExtractor,
|
|
7702
7815
|
WwwHardwarezoneComSgExtractor: WwwHardwarezoneComSgExtractor,
|
|
7703
|
-
WwwSpiegelDeExtractor: WwwSpiegelDeExtractor,
|
|
7704
|
-
MobilesyrupComExtractor: MobilesyrupComExtractor,
|
|
7705
|
-
WwwChannelnewsasiaComExtractor: WwwChannelnewsasiaComExtractor,
|
|
7706
|
-
WccftechComExtractor: WccftechComExtractor,
|
|
7707
7816
|
WwwHeiseDeExtractor: WwwHeiseDeExtractor,
|
|
7708
|
-
|
|
7709
|
-
|
|
7710
|
-
|
|
7711
|
-
|
|
7712
|
-
|
|
7713
|
-
|
|
7714
|
-
|
|
7715
|
-
|
|
7716
|
-
|
|
7717
|
-
|
|
7718
|
-
|
|
7719
|
-
LodzSePlExtractor: LodzSePlExtractor,
|
|
7720
|
-
WroclawSePlExtractor: WroclawSePlExtractor,
|
|
7721
|
-
LublinSePlExtractor: LublinSePlExtractor,
|
|
7722
|
-
BialystokSePlExtractor: BialystokSePlExtractor,
|
|
7817
|
+
WwwHuffingtonpostComExtractor: WwwHuffingtonpostComExtractor,
|
|
7818
|
+
WwwIlfattoquotidianoItExtractor: WwwIlfattoquotidianoItExtractor,
|
|
7819
|
+
WwwInfoqComExtractor: WwwInfoqComExtractor,
|
|
7820
|
+
WwwInquisitrComExtractor: WwwInquisitrComExtractor,
|
|
7821
|
+
WwwInvestmentexecutiveComExtractor: WwwInvestmentexecutiveComExtractor,
|
|
7822
|
+
WwwIpaGoJpExtractor: WwwIpaGoJpExtractor,
|
|
7823
|
+
WwwItmediaCoJpExtractor: WwwItmediaCoJpExtractor,
|
|
7824
|
+
WwwJalopnikComExtractor: WwwJalopnikComExtractor,
|
|
7825
|
+
WwwJnsaOrgExtractor: WwwJnsaOrgExtractor,
|
|
7826
|
+
WwwLadbibleComExtractor: WwwLadbibleComExtractor,
|
|
7827
|
+
WwwLatimesComExtractor: WwwLatimesComExtractor,
|
|
7723
7828
|
WwwLebensmittelwarnungDeExtractor: WwwLebensmittelwarnungDeExtractor,
|
|
7829
|
+
WwwLemondeFrExtractor: WwwLemondeFrExtractor,
|
|
7830
|
+
WwwLifehackerJpExtractor: WwwLifehackerJpExtractor,
|
|
7831
|
+
WwwLinkedinComExtractor: WwwLinkedinComExtractor,
|
|
7832
|
+
WwwMacrumorsComExtractor: WwwMacrumorsComExtractor,
|
|
7833
|
+
WwwMentalflossComExtractor: WwwMentalflossComExtractor,
|
|
7834
|
+
WwwMiamiheraldComExtractor: WwwMiamiheraldComExtractor,
|
|
7835
|
+
WwwMoongiftJpExtractor: WwwMoongiftJpExtractor,
|
|
7836
|
+
WwwMotorsportComExtractor: WwwMotorsportComExtractor,
|
|
7837
|
+
WwwMsnbcComExtractor: WwwMsnbcComExtractor,
|
|
7838
|
+
WwwNationalgeographicComExtractor: WwwNationalgeographicComExtractor,
|
|
7839
|
+
WwwNbcnewsComExtractor: WwwNbcnewsComExtractor,
|
|
7840
|
+
WwwNdtvComExtractor: WwwNdtvComExtractor,
|
|
7841
|
+
WwwNotebookcheckNetExtractor: WwwNotebookcheckNetExtractor,
|
|
7842
|
+
WwwNprOrgExtractor: WwwNprOrgExtractor,
|
|
7843
|
+
WwwNtvDeExtractor: WwwNtvDeExtractor,
|
|
7844
|
+
WwwNumeramaComExtractor: WwwNumeramaComExtractor,
|
|
7845
|
+
WwwNydailynewsComExtractor: WwwNydailynewsComExtractor,
|
|
7846
|
+
WwwOpposingviewsComExtractor: WwwOpposingviewsComExtractor,
|
|
7847
|
+
WwwOreillyCoJpExtractor: WwwOreillyCoJpExtractor,
|
|
7848
|
+
WwwOssnewsJpExtractor: WwwOssnewsJpExtractor,
|
|
7849
|
+
WwwPhoronixComExtractor: WwwPhoronixComExtractor,
|
|
7850
|
+
WwwPolygonComExtractor: WwwPolygonComExtractor,
|
|
7851
|
+
WwwPopsugarComExtractor: WwwPopsugarComExtractor,
|
|
7852
|
+
WwwProspectmagazineCoUkExtractor: WwwProspectmagazineCoUkExtractor,
|
|
7853
|
+
WwwPublickey1JpExtractor: WwwPublickey1JpExtractor,
|
|
7724
7854
|
WwwQbitaiComExtractor: WwwQbitaiComExtractor,
|
|
7725
|
-
|
|
7726
|
-
|
|
7855
|
+
WwwQdailyComExtractor: WwwQdailyComExtractor,
|
|
7856
|
+
WwwRawstoryComExtractor: WwwRawstoryComExtractor,
|
|
7857
|
+
WwwRbbtodayComExtractor: WwwRbbtodayComExtractor,
|
|
7858
|
+
WwwRecodeNetExtractor: WwwRecodeNetExtractor,
|
|
7859
|
+
WwwRedditComExtractor: WwwRedditComExtractor,
|
|
7860
|
+
WwwRefinery29ComExtractor: WwwRefinery29ComExtractor,
|
|
7861
|
+
WwwReutersComExtractor: WwwReutersComExtractor,
|
|
7862
|
+
WwwRollingstoneComExtractor: WwwRollingstoneComExtractor,
|
|
7863
|
+
WwwSanwaCoJpExtractor: WwwSanwaCoJpExtractor,
|
|
7864
|
+
WwwSbnationComExtractor: WwwSbnationComExtractor,
|
|
7865
|
+
WwwSePlExtractor: WwwSePlExtractor,
|
|
7866
|
+
WwwSiComExtractor: WwwSiComExtractor,
|
|
7867
|
+
WwwSlateComExtractor: WwwSlateComExtractor,
|
|
7868
|
+
WwwSpiegelDeExtractor: WwwSpiegelDeExtractor,
|
|
7727
7869
|
WwwTagesschauDeExtractor: WwwTagesschauDeExtractor,
|
|
7728
|
-
Nineto5googleComExtractor: Nineto5googleComExtractor,
|
|
7729
|
-
WwwEngadgetComExtractor: WwwEngadgetComExtractor,
|
|
7730
|
-
TarnkappeInfoExtractor: TarnkappeInfoExtractor,
|
|
7731
|
-
WwwVortezNetExtractor: WwwVortezNetExtractor,
|
|
7732
|
-
WwwPolygonComExtractor: WwwPolygonComExtractor,
|
|
7733
|
-
WwwThevergeComExtractor: WwwThevergeComExtractor,
|
|
7734
7870
|
WwwTechpowerupComExtractor: WwwTechpowerupComExtractor,
|
|
7735
|
-
WwwFlatpanelshdComExtractor: WwwFlatpanelshdComExtractor,
|
|
7736
|
-
Nineto5macComExtractor: Nineto5macComExtractor,
|
|
7737
|
-
WwwNotebookcheckNetExtractor: WwwNotebookcheckNetExtractor,
|
|
7738
|
-
WwwFuturaSciencesComExtractor: WwwFuturaSciencesComExtractor,
|
|
7739
|
-
SgNewsYahooComExtractor: SgNewsYahooComExtractor,
|
|
7740
|
-
GonintendoComExtractor: GonintendoComExtractor,
|
|
7741
|
-
OrfAtExtractor: OrfAtExtractor,
|
|
7742
|
-
WwwVideogameschronicleComExtractor: WwwVideogameschronicleComExtractor,
|
|
7743
|
-
WwwNumeramaComExtractor: WwwNumeramaComExtractor,
|
|
7744
|
-
TerminaltroveComExtractor: TerminaltroveComExtractor,
|
|
7745
|
-
NewsPtsOrgTwExtractor: NewsPtsOrgTwExtractor,
|
|
7746
7871
|
WwwThedriveComExtractor: WwwThedriveComExtractor,
|
|
7747
|
-
|
|
7748
|
-
|
|
7749
|
-
|
|
7872
|
+
WwwTheguardianComExtractor: WwwTheguardianComExtractor,
|
|
7873
|
+
WwwThepennyhoarderComExtractor: WwwThepennyhoarderComExtractor,
|
|
7874
|
+
WwwThepoliticalinsiderComExtractor: WwwThepoliticalinsiderComExtractor,
|
|
7875
|
+
WwwThevergeComExtractor: WwwThevergeComExtractor,
|
|
7876
|
+
WwwTmzComExtractor: WwwTmzComExtractor,
|
|
7877
|
+
WwwTodayComExtractor: WwwTodayComExtractor,
|
|
7750
7878
|
WwwTransfermarktDeExtractor: WwwTransfermarktDeExtractor,
|
|
7751
|
-
|
|
7752
|
-
|
|
7753
|
-
|
|
7879
|
+
WwwTweaktownComExtractor: WwwTweaktownComExtractor,
|
|
7880
|
+
WwwUsmagazineComExtractor: WwwUsmagazineComExtractor,
|
|
7881
|
+
WwwVersantsComExtractor: WwwVersantsComExtractor,
|
|
7882
|
+
WwwVideogameschronicleComExtractor: WwwVideogameschronicleComExtractor,
|
|
7883
|
+
WwwVortezNetExtractor: WwwVortezNetExtractor,
|
|
7884
|
+
WwwVoxComExtractor: WwwVoxComExtractor,
|
|
7885
|
+
WwwWashingtonpostComExtractor: WwwWashingtonpostComExtractor,
|
|
7886
|
+
WwwWesternjournalismComExtractor: WwwWesternjournalismComExtractor,
|
|
7887
|
+
WwwYomiuriCoJpExtractor: WwwYomiuriCoJpExtractor,
|
|
7888
|
+
WwwYoutubeComExtractor: WwwYoutubeComExtractor,
|
|
7889
|
+
YahooExtractor: YahooExtractor,
|
|
7890
|
+
twofortysevensportsComExtractor: twofortysevensportsComExtractor
|
|
7754
7891
|
});
|
|
7755
7892
|
function ownKeys$5(e, r) {
|
|
7756
|
-
var t = _Object$
|
|
7757
|
-
if (_Object$
|
|
7758
|
-
var o = _Object$
|
|
7893
|
+
var t = _Object$keys$1(e);
|
|
7894
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
7895
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
7759
7896
|
r && (o = o.filter(function (r) {
|
|
7760
|
-
return _Object$
|
|
7897
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
7761
7898
|
})), t.push.apply(t, o);
|
|
7762
7899
|
}
|
|
7763
7900
|
return t;
|
|
@@ -7765,15 +7902,15 @@ function requireMercury() {
|
|
|
7765
7902
|
function _objectSpread$5(e) {
|
|
7766
7903
|
for (var r = 1; r < arguments.length; r++) {
|
|
7767
7904
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
7768
|
-
r % 2 ? ownKeys$5(Object(t),
|
|
7769
|
-
|
|
7770
|
-
}) : _Object$
|
|
7771
|
-
_Object$
|
|
7905
|
+
r % 2 ? ownKeys$5(Object(t), true).forEach(function (r) {
|
|
7906
|
+
_defineProperty$1(e, r, t[r]);
|
|
7907
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$5(Object(t)).forEach(function (r) {
|
|
7908
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
7772
7909
|
});
|
|
7773
7910
|
}
|
|
7774
7911
|
return e;
|
|
7775
7912
|
}
|
|
7776
|
-
var Extractors = _Object$
|
|
7913
|
+
var Extractors = _Object$keys$1(CustomExtractors).reduce(function (acc, key) {
|
|
7777
7914
|
var extractor = CustomExtractors[key];
|
|
7778
7915
|
return _objectSpread$5(_objectSpread$5({}, acc), mergeSupportedDomains(extractor));
|
|
7779
7916
|
}, {});
|
|
@@ -7841,9 +7978,9 @@ function requireMercury() {
|
|
|
7841
7978
|
if (TEXT_LINK_RE.test(dekText)) return null;
|
|
7842
7979
|
return normalizeSpaces(dekText.trim());
|
|
7843
7980
|
}
|
|
7844
|
-
|
|
7845
|
-
|
|
7846
|
-
|
|
7981
|
+
dayjs.extend(utc);
|
|
7982
|
+
dayjs.extend(timezonePlugin);
|
|
7983
|
+
dayjs.extend(customParseFormat);
|
|
7847
7984
|
var TIMEZONE_ABBR_RE = /\b(EST|EDT|CST|CDT|MST|MDT|PST|PDT|ET|CT|MT|PT|GMT|UTC)\b/gi;
|
|
7848
7985
|
// Check if string contains timezone offset info (e.g., +0000, GMT+0000, Z)
|
|
7849
7986
|
var HAS_TIMEZONE_RE = /([+-]\d{2}:?\d{2}|Z|\bGMT[+-]\d+|\bUTC\b)/i;
|
|
@@ -7863,53 +8000,53 @@ function requireMercury() {
|
|
|
7863
8000
|
}
|
|
7864
8001
|
function createDate(dateString, timezone, format) {
|
|
7865
8002
|
if (TIME_WITH_OFFSET_RE.test(dateString)) {
|
|
7866
|
-
return
|
|
8003
|
+
return dayjs(new Date(dateString));
|
|
7867
8004
|
}
|
|
7868
8005
|
if (TIME_AGO_STRING.test(dateString)) {
|
|
7869
8006
|
var fragments = TIME_AGO_STRING.exec(dateString);
|
|
7870
|
-
return
|
|
8007
|
+
return dayjs().subtract(fragments[1], fragments[2]);
|
|
7871
8008
|
}
|
|
7872
8009
|
if (TIME_NOW_STRING.test(dateString)) {
|
|
7873
|
-
return
|
|
8010
|
+
return dayjs();
|
|
7874
8011
|
}
|
|
7875
8012
|
var stringHasTimezone = hasTimezoneInfo(dateString);
|
|
7876
8013
|
var cleanedDateString = stripTimezoneAbbr(dateString);
|
|
7877
8014
|
if (stringHasTimezone) {
|
|
7878
8015
|
var _nativeDate = new Date(dateString);
|
|
7879
|
-
if (!_Number$
|
|
7880
|
-
return
|
|
8016
|
+
if (!_Number$isNaN(_nativeDate.getTime())) {
|
|
8017
|
+
return dayjs(_nativeDate);
|
|
7881
8018
|
}
|
|
7882
8019
|
}
|
|
7883
8020
|
if (timezone && !stringHasTimezone) {
|
|
7884
8021
|
if (format) {
|
|
7885
8022
|
var cleanedFormat = stripTimezoneFromFormat(format);
|
|
7886
8023
|
try {
|
|
7887
|
-
var _parsed =
|
|
8024
|
+
var _parsed = dayjs.tz(cleanedDateString, cleanedFormat, timezone);
|
|
7888
8025
|
if (_parsed.isValid()) return _parsed;
|
|
7889
8026
|
} catch (_unused) {
|
|
7890
8027
|
// Fall through
|
|
7891
8028
|
}
|
|
7892
8029
|
}
|
|
7893
8030
|
var _nativeDate2 = new Date(cleanedDateString);
|
|
7894
|
-
if (!_Number$
|
|
7895
|
-
return
|
|
8031
|
+
if (!_Number$isNaN(_nativeDate2.getTime())) {
|
|
8032
|
+
return dayjs(_nativeDate2).tz(timezone, true);
|
|
7896
8033
|
}
|
|
7897
|
-
var parsed =
|
|
8034
|
+
var parsed = dayjs(cleanedDateString);
|
|
7898
8035
|
if (parsed.isValid()) {
|
|
7899
8036
|
return parsed.tz(timezone, true);
|
|
7900
8037
|
}
|
|
7901
|
-
return
|
|
8038
|
+
return dayjs(null);
|
|
7902
8039
|
}
|
|
7903
8040
|
if (format) {
|
|
7904
8041
|
var _cleanedFormat = stripTimezoneFromFormat(format);
|
|
7905
|
-
var _parsed2 =
|
|
8042
|
+
var _parsed2 = dayjs(cleanedDateString, _cleanedFormat);
|
|
7906
8043
|
if (_parsed2.isValid()) return _parsed2;
|
|
7907
8044
|
}
|
|
7908
8045
|
var nativeDate = new Date(cleanedDateString);
|
|
7909
|
-
if (!_Number$
|
|
7910
|
-
return
|
|
8046
|
+
if (!_Number$isNaN(nativeDate.getTime())) {
|
|
8047
|
+
return dayjs(nativeDate);
|
|
7911
8048
|
}
|
|
7912
|
-
return
|
|
8049
|
+
return dayjs(cleanedDateString);
|
|
7913
8050
|
}
|
|
7914
8051
|
|
|
7915
8052
|
// Take a date published string, and hopefully return a date out of
|
|
@@ -7920,10 +8057,10 @@ function requireMercury() {
|
|
|
7920
8057
|
format = _ref.format;
|
|
7921
8058
|
// If string is in milliseconds or seconds, convert to int and return
|
|
7922
8059
|
if (MS_DATE_STRING.test(dateString)) {
|
|
7923
|
-
return new Date(
|
|
8060
|
+
return new Date(_parseInt$1(dateString, 10)).toISOString();
|
|
7924
8061
|
}
|
|
7925
8062
|
if (SEC_DATE_STRING.test(dateString)) {
|
|
7926
|
-
return new Date(
|
|
8063
|
+
return new Date(_parseInt$1(dateString, 10) * 1000).toISOString();
|
|
7927
8064
|
}
|
|
7928
8065
|
var date = createDate(dateString, timezone, format);
|
|
7929
8066
|
if (!date.isValid()) {
|
|
@@ -7997,13 +8134,13 @@ function requireMercury() {
|
|
|
7997
8134
|
acc[titleText] = acc[titleText] ? acc[titleText] + 1 : 1;
|
|
7998
8135
|
return acc;
|
|
7999
8136
|
}, {});
|
|
8000
|
-
var _Reflect$ownKeys$redu = _Reflect$
|
|
8137
|
+
var _Reflect$ownKeys$redu = _Reflect$ownKeys$1(termCounts).reduce(function (acc, key) {
|
|
8001
8138
|
if (acc[1] < termCounts[key]) {
|
|
8002
8139
|
return [key, termCounts[key]];
|
|
8003
8140
|
}
|
|
8004
8141
|
return acc;
|
|
8005
8142
|
}, [0, 0]),
|
|
8006
|
-
_Reflect$ownKeys$redu2 =
|
|
8143
|
+
_Reflect$ownKeys$redu2 = _slicedToArray$1(_Reflect$ownKeys$redu, 2),
|
|
8007
8144
|
maxTerm = _Reflect$ownKeys$redu2[0],
|
|
8008
8145
|
termCount = _Reflect$ownKeys$redu2[1];
|
|
8009
8146
|
|
|
@@ -8032,16 +8169,16 @@ function requireMercury() {
|
|
|
8032
8169
|
//
|
|
8033
8170
|
// Strip out the big TLDs - it just makes the matching a bit more
|
|
8034
8171
|
// accurate. Not the end of the world if it doesn't strip right.
|
|
8035
|
-
var _URL$parse =
|
|
8172
|
+
var _URL$parse = URL$1$1.parse(url),
|
|
8036
8173
|
host = _URL$parse.host;
|
|
8037
8174
|
var nakedDomain = host.replace(DOMAIN_ENDINGS_RE, '');
|
|
8038
8175
|
var startSlug = splitTitle[0].toLowerCase().replace(' ', '');
|
|
8039
|
-
var startSlugRatio =
|
|
8176
|
+
var startSlugRatio = wuzzy.levenshtein(startSlug, nakedDomain);
|
|
8040
8177
|
if (startSlugRatio > 0.4 && startSlug.length > 5) {
|
|
8041
8178
|
return splitTitle.slice(2).join('');
|
|
8042
8179
|
}
|
|
8043
8180
|
var endSlug = splitTitle.slice(-1)[0].toLowerCase().replace(' ', '');
|
|
8044
|
-
var endSlugRatio =
|
|
8181
|
+
var endSlugRatio = wuzzy.levenshtein(endSlug, nakedDomain);
|
|
8045
8182
|
if (endSlugRatio > 0.4 && endSlug.length >= 5) {
|
|
8046
8183
|
return splitTitle.slice(0, -2).join('');
|
|
8047
8184
|
}
|
|
@@ -8138,7 +8275,7 @@ function requireMercury() {
|
|
|
8138
8275
|
// First, look for special hNews based selectors and give them a big
|
|
8139
8276
|
// boost, if they exist
|
|
8140
8277
|
HNEWS_CONTENT_SELECTORS.forEach(function (_ref) {
|
|
8141
|
-
var _ref2 =
|
|
8278
|
+
var _ref2 = _slicedToArray$1(_ref, 2),
|
|
8142
8279
|
parentSelector = _ref2[0],
|
|
8143
8280
|
childSelector = _ref2[1];
|
|
8144
8281
|
$("".concat(parentSelector, " ").concat(childSelector)).each(function (index, node) {
|
|
@@ -8270,9 +8407,9 @@ function requireMercury() {
|
|
|
8270
8407
|
return $topCandidate;
|
|
8271
8408
|
}
|
|
8272
8409
|
function _createForOfIteratorHelper$2(r, e) {
|
|
8273
|
-
var t = "undefined" != typeof
|
|
8410
|
+
var t = "undefined" != typeof _Symbol$1 && r[_Symbol$iterator$1] || r["@@iterator"];
|
|
8274
8411
|
if (!t) {
|
|
8275
|
-
if (_Array$
|
|
8412
|
+
if (_Array$isArray$1(r) || (t = _unsupportedIterableToArray$2(r)) || e) {
|
|
8276
8413
|
t && (r = t);
|
|
8277
8414
|
var _n = 0,
|
|
8278
8415
|
F = function F() {};
|
|
@@ -8280,9 +8417,9 @@ function requireMercury() {
|
|
|
8280
8417
|
s: F,
|
|
8281
8418
|
n: function n() {
|
|
8282
8419
|
return _n >= r.length ? {
|
|
8283
|
-
done:
|
|
8420
|
+
done: true
|
|
8284
8421
|
} : {
|
|
8285
|
-
done:
|
|
8422
|
+
done: false,
|
|
8286
8423
|
value: r[_n++]
|
|
8287
8424
|
};
|
|
8288
8425
|
},
|
|
@@ -8295,8 +8432,8 @@ function requireMercury() {
|
|
|
8295
8432
|
throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.");
|
|
8296
8433
|
}
|
|
8297
8434
|
var o,
|
|
8298
|
-
a =
|
|
8299
|
-
u =
|
|
8435
|
+
a = true,
|
|
8436
|
+
u = false;
|
|
8300
8437
|
return {
|
|
8301
8438
|
s: function s() {
|
|
8302
8439
|
t = t.call(r);
|
|
@@ -8306,7 +8443,7 @@ function requireMercury() {
|
|
|
8306
8443
|
return a = r.done, r;
|
|
8307
8444
|
},
|
|
8308
8445
|
e: function e(r) {
|
|
8309
|
-
u =
|
|
8446
|
+
u = true, o = r;
|
|
8310
8447
|
},
|
|
8311
8448
|
f: function f() {
|
|
8312
8449
|
try {
|
|
@@ -8321,7 +8458,7 @@ function requireMercury() {
|
|
|
8321
8458
|
if (r) {
|
|
8322
8459
|
if ("string" == typeof r) return _arrayLikeToArray$2(r, a);
|
|
8323
8460
|
var t = {}.toString.call(r).slice(8, -1);
|
|
8324
|
-
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$
|
|
8461
|
+
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from$1(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$2(r, a) : void 0;
|
|
8325
8462
|
}
|
|
8326
8463
|
}
|
|
8327
8464
|
function _arrayLikeToArray$2(r, a) {
|
|
@@ -8330,11 +8467,11 @@ function requireMercury() {
|
|
|
8330
8467
|
return n;
|
|
8331
8468
|
}
|
|
8332
8469
|
function ownKeys$4(e, r) {
|
|
8333
|
-
var t = _Object$
|
|
8334
|
-
if (_Object$
|
|
8335
|
-
var o = _Object$
|
|
8470
|
+
var t = _Object$keys$1(e);
|
|
8471
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
8472
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
8336
8473
|
r && (o = o.filter(function (r) {
|
|
8337
|
-
return _Object$
|
|
8474
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
8338
8475
|
})), t.push.apply(t, o);
|
|
8339
8476
|
}
|
|
8340
8477
|
return t;
|
|
@@ -8342,10 +8479,10 @@ function requireMercury() {
|
|
|
8342
8479
|
function _objectSpread$4(e) {
|
|
8343
8480
|
for (var r = 1; r < arguments.length; r++) {
|
|
8344
8481
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
8345
|
-
r % 2 ? ownKeys$4(Object(t),
|
|
8346
|
-
|
|
8347
|
-
}) : _Object$
|
|
8348
|
-
_Object$
|
|
8482
|
+
r % 2 ? ownKeys$4(Object(t), true).forEach(function (r) {
|
|
8483
|
+
_defineProperty$1(e, r, t[r]);
|
|
8484
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$4(Object(t)).forEach(function (r) {
|
|
8485
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
8349
8486
|
});
|
|
8350
8487
|
}
|
|
8351
8488
|
return e;
|
|
@@ -8392,8 +8529,7 @@ function requireMercury() {
|
|
|
8392
8529
|
|
|
8393
8530
|
// We didn't succeed on first pass, one by one disable our
|
|
8394
8531
|
// extraction opts and try again.
|
|
8395
|
-
|
|
8396
|
-
var _iterator = _createForOfIteratorHelper$2(_Reflect$ownKeys__default$1["default"](opts).filter(function (k) {
|
|
8532
|
+
var _iterator = _createForOfIteratorHelper$2(_Reflect$ownKeys$1(opts).filter(function (k) {
|
|
8397
8533
|
return opts[k] === true;
|
|
8398
8534
|
})),
|
|
8399
8535
|
_step;
|
|
@@ -8520,9 +8656,9 @@ function requireMercury() {
|
|
|
8520
8656
|
var bylineRe = /^[\n\s]*By/i;
|
|
8521
8657
|
var BYLINE_SELECTORS_RE = [['#byline', bylineRe], ['.byline', bylineRe]];
|
|
8522
8658
|
function _createForOfIteratorHelper$1(r, e) {
|
|
8523
|
-
var t = "undefined" != typeof
|
|
8659
|
+
var t = "undefined" != typeof _Symbol$1 && r[_Symbol$iterator$1] || r["@@iterator"];
|
|
8524
8660
|
if (!t) {
|
|
8525
|
-
if (_Array$
|
|
8661
|
+
if (_Array$isArray$1(r) || (t = _unsupportedIterableToArray$1(r)) || e) {
|
|
8526
8662
|
t && (r = t);
|
|
8527
8663
|
var _n = 0,
|
|
8528
8664
|
F = function F() {};
|
|
@@ -8530,9 +8666,9 @@ function requireMercury() {
|
|
|
8530
8666
|
s: F,
|
|
8531
8667
|
n: function n() {
|
|
8532
8668
|
return _n >= r.length ? {
|
|
8533
|
-
done:
|
|
8669
|
+
done: true
|
|
8534
8670
|
} : {
|
|
8535
|
-
done:
|
|
8671
|
+
done: false,
|
|
8536
8672
|
value: r[_n++]
|
|
8537
8673
|
};
|
|
8538
8674
|
},
|
|
@@ -8545,8 +8681,8 @@ function requireMercury() {
|
|
|
8545
8681
|
throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.");
|
|
8546
8682
|
}
|
|
8547
8683
|
var o,
|
|
8548
|
-
a =
|
|
8549
|
-
u =
|
|
8684
|
+
a = true,
|
|
8685
|
+
u = false;
|
|
8550
8686
|
return {
|
|
8551
8687
|
s: function s() {
|
|
8552
8688
|
t = t.call(r);
|
|
@@ -8556,7 +8692,7 @@ function requireMercury() {
|
|
|
8556
8692
|
return a = r.done, r;
|
|
8557
8693
|
},
|
|
8558
8694
|
e: function e(r) {
|
|
8559
|
-
u =
|
|
8695
|
+
u = true, o = r;
|
|
8560
8696
|
},
|
|
8561
8697
|
f: function f() {
|
|
8562
8698
|
try {
|
|
@@ -8571,7 +8707,7 @@ function requireMercury() {
|
|
|
8571
8707
|
if (r) {
|
|
8572
8708
|
if ("string" == typeof r) return _arrayLikeToArray$1(r, a);
|
|
8573
8709
|
var t = {}.toString.call(r).slice(8, -1);
|
|
8574
|
-
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$
|
|
8710
|
+
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from$1(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray$1(r, a) : void 0;
|
|
8575
8711
|
}
|
|
8576
8712
|
}
|
|
8577
8713
|
function _arrayLikeToArray$1(r, a) {
|
|
@@ -8600,12 +8736,11 @@ function requireMercury() {
|
|
|
8600
8736
|
|
|
8601
8737
|
// Last, use our looser regular-expression based selectors for
|
|
8602
8738
|
// potential authors.
|
|
8603
|
-
// eslint-disable-next-line no-restricted-syntax
|
|
8604
8739
|
var _iterator = _createForOfIteratorHelper$1(BYLINE_SELECTORS_RE),
|
|
8605
8740
|
_step;
|
|
8606
8741
|
try {
|
|
8607
8742
|
for (_iterator.s(); !(_step = _iterator.n()).done;) {
|
|
8608
|
-
var _step$value =
|
|
8743
|
+
var _step$value = _slicedToArray$1(_step.value, 2),
|
|
8609
8744
|
selector = _step$value[0],
|
|
8610
8745
|
regex = _step$value[1];
|
|
8611
8746
|
var node = $(selector);
|
|
@@ -8760,8 +8895,8 @@ function requireMercury() {
|
|
|
8760
8895
|
}
|
|
8761
8896
|
function scoreByDimensions($img) {
|
|
8762
8897
|
var score = 0;
|
|
8763
|
-
var width =
|
|
8764
|
-
var height =
|
|
8898
|
+
var width = _parseFloat$1($img.attr('width'));
|
|
8899
|
+
var height = _parseFloat$1($img.attr('height'));
|
|
8765
8900
|
var src = $img.attr('src');
|
|
8766
8901
|
|
|
8767
8902
|
// Penalty for skinny images
|
|
@@ -8788,9 +8923,9 @@ function requireMercury() {
|
|
|
8788
8923
|
return $imgs.length / 2 - index;
|
|
8789
8924
|
}
|
|
8790
8925
|
function _createForOfIteratorHelper(r, e) {
|
|
8791
|
-
var t = "undefined" != typeof
|
|
8926
|
+
var t = "undefined" != typeof _Symbol$1 && r[_Symbol$iterator$1] || r["@@iterator"];
|
|
8792
8927
|
if (!t) {
|
|
8793
|
-
if (_Array$
|
|
8928
|
+
if (_Array$isArray$1(r) || (t = _unsupportedIterableToArray(r)) || e) {
|
|
8794
8929
|
t && (r = t);
|
|
8795
8930
|
var _n = 0,
|
|
8796
8931
|
F = function F() {};
|
|
@@ -8798,9 +8933,9 @@ function requireMercury() {
|
|
|
8798
8933
|
s: F,
|
|
8799
8934
|
n: function n() {
|
|
8800
8935
|
return _n >= r.length ? {
|
|
8801
|
-
done:
|
|
8936
|
+
done: true
|
|
8802
8937
|
} : {
|
|
8803
|
-
done:
|
|
8938
|
+
done: false,
|
|
8804
8939
|
value: r[_n++]
|
|
8805
8940
|
};
|
|
8806
8941
|
},
|
|
@@ -8813,8 +8948,8 @@ function requireMercury() {
|
|
|
8813
8948
|
throw new TypeError("Invalid attempt to iterate non-iterable instance.\nIn order to be iterable, non-array objects must have a [Symbol.iterator]() method.");
|
|
8814
8949
|
}
|
|
8815
8950
|
var o,
|
|
8816
|
-
a =
|
|
8817
|
-
u =
|
|
8951
|
+
a = true,
|
|
8952
|
+
u = false;
|
|
8818
8953
|
return {
|
|
8819
8954
|
s: function s() {
|
|
8820
8955
|
t = t.call(r);
|
|
@@ -8824,7 +8959,7 @@ function requireMercury() {
|
|
|
8824
8959
|
return a = r.done, r;
|
|
8825
8960
|
},
|
|
8826
8961
|
e: function e(r) {
|
|
8827
|
-
u =
|
|
8962
|
+
u = true, o = r;
|
|
8828
8963
|
},
|
|
8829
8964
|
f: function f() {
|
|
8830
8965
|
try {
|
|
@@ -8839,7 +8974,7 @@ function requireMercury() {
|
|
|
8839
8974
|
if (r) {
|
|
8840
8975
|
if ("string" == typeof r) return _arrayLikeToArray(r, a);
|
|
8841
8976
|
var t = {}.toString.call(r).slice(8, -1);
|
|
8842
|
-
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$
|
|
8977
|
+
return "Object" === t && r.constructor && (t = r.constructor.name), "Map" === t || "Set" === t ? _Array$from$1(r) : "Arguments" === t || /^(?:Ui|I)nt(?:8|16|32)(?:Clamped)?Array$/.test(t) ? _arrayLikeToArray(r, a) : void 0;
|
|
8843
8978
|
}
|
|
8844
8979
|
}
|
|
8845
8980
|
function _arrayLikeToArray(r, a) {
|
|
@@ -8895,10 +9030,10 @@ function requireMercury() {
|
|
|
8895
9030
|
score += scoreByPosition(imgs, index);
|
|
8896
9031
|
imgScores[src] = score;
|
|
8897
9032
|
});
|
|
8898
|
-
var _Reflect$ownKeys$redu = _Reflect$
|
|
9033
|
+
var _Reflect$ownKeys$redu = _Reflect$ownKeys$1(imgScores).reduce(function (acc, key) {
|
|
8899
9034
|
return imgScores[key] > acc[1] ? [key, imgScores[key]] : acc;
|
|
8900
9035
|
}, [null, 0]),
|
|
8901
|
-
_Reflect$ownKeys$redu2 =
|
|
9036
|
+
_Reflect$ownKeys$redu2 = _slicedToArray$1(_Reflect$ownKeys$redu, 2),
|
|
8902
9037
|
topUrl = _Reflect$ownKeys$redu2[0],
|
|
8903
9038
|
topScore = _Reflect$ownKeys$redu2[1];
|
|
8904
9039
|
if (topScore > 0) {
|
|
@@ -8908,7 +9043,6 @@ function requireMercury() {
|
|
|
8908
9043
|
|
|
8909
9044
|
// If nothing else worked, check to see if there are any really
|
|
8910
9045
|
// probable nodes in the doc, like <link rel="image_src" />.
|
|
8911
|
-
// eslint-disable-next-line no-restricted-syntax
|
|
8912
9046
|
var _iterator = _createForOfIteratorHelper(LEAD_IMAGE_URL_SELECTORS),
|
|
8913
9047
|
_step;
|
|
8914
9048
|
try {
|
|
@@ -8946,7 +9080,7 @@ function requireMercury() {
|
|
|
8946
9080
|
// sliding scale, subtract points from this link based on
|
|
8947
9081
|
// similarity.
|
|
8948
9082
|
if (score > 0) {
|
|
8949
|
-
var similarity = new
|
|
9083
|
+
var similarity = new difflib.SequenceMatcher(null, articleUrl, href).ratio();
|
|
8950
9084
|
// Subtract .1 from diff_percent when calculating modifier,
|
|
8951
9085
|
// which means that if it's less than 10% different, we give a
|
|
8952
9086
|
// bonus instead. Ex:
|
|
@@ -8966,7 +9100,7 @@ function requireMercury() {
|
|
|
8966
9100
|
// get scored, and sorted properly by score.
|
|
8967
9101
|
var score = 0;
|
|
8968
9102
|
if (IS_DIGIT_RE.test(linkText.trim())) {
|
|
8969
|
-
var linkTextAsNum =
|
|
9103
|
+
var linkTextAsNum = _parseInt$1(linkText, 10);
|
|
8970
9104
|
// If it's the first page, we already got it on the first call.
|
|
8971
9105
|
// Give it a negative score. Otherwise, up to page 10, give a
|
|
8972
9106
|
// small bonus.
|
|
@@ -9031,7 +9165,7 @@ function requireMercury() {
|
|
|
9031
9165
|
var positiveMatch = false;
|
|
9032
9166
|
var negativeMatch = false;
|
|
9033
9167
|
var score = 0;
|
|
9034
|
-
_Array$
|
|
9168
|
+
_Array$from$1(range(0, 4)).forEach(function () {
|
|
9035
9169
|
if ($parent.length === 0) {
|
|
9036
9170
|
return;
|
|
9037
9171
|
}
|
|
@@ -9079,7 +9213,7 @@ function requireMercury() {
|
|
|
9079
9213
|
return false;
|
|
9080
9214
|
}
|
|
9081
9215
|
var hostname = parsedUrl.hostname;
|
|
9082
|
-
var _URL$parse =
|
|
9216
|
+
var _URL$parse = URL$1$1.parse(href),
|
|
9083
9217
|
linkHost = _URL$parse.hostname;
|
|
9084
9218
|
|
|
9085
9219
|
// Domain mismatch.
|
|
@@ -9150,7 +9284,7 @@ function requireMercury() {
|
|
|
9150
9284
|
$ = _ref.$,
|
|
9151
9285
|
_ref$previousUrls = _ref.previousUrls,
|
|
9152
9286
|
previousUrls = _ref$previousUrls === void 0 ? [] : _ref$previousUrls;
|
|
9153
|
-
parsedUrl = parsedUrl ||
|
|
9287
|
+
parsedUrl = parsedUrl || URL$1$1.parse(articleUrl);
|
|
9154
9288
|
var baseRegex = makeBaseRegex(baseUrl);
|
|
9155
9289
|
var isWp = isWordpress($);
|
|
9156
9290
|
|
|
@@ -9201,7 +9335,7 @@ function requireMercury() {
|
|
|
9201
9335
|
possiblePage.score = score;
|
|
9202
9336
|
return possiblePages;
|
|
9203
9337
|
}, {});
|
|
9204
|
-
return _Reflect$
|
|
9338
|
+
return _Reflect$ownKeys$1(scoredPages).length === 0 ? null : scoredPages;
|
|
9205
9339
|
}
|
|
9206
9340
|
|
|
9207
9341
|
// Looks for and returns next page url
|
|
@@ -9213,7 +9347,7 @@ function requireMercury() {
|
|
|
9213
9347
|
parsedUrl = _ref.parsedUrl,
|
|
9214
9348
|
_ref$previousUrls = _ref.previousUrls,
|
|
9215
9349
|
previousUrls = _ref$previousUrls === void 0 ? [] : _ref$previousUrls;
|
|
9216
|
-
parsedUrl = parsedUrl ||
|
|
9350
|
+
parsedUrl = parsedUrl || URL$1$1.parse(url);
|
|
9217
9351
|
var articleUrl = removeAnchor(url);
|
|
9218
9352
|
var baseUrl = articleBaseUrl(url, parsedUrl);
|
|
9219
9353
|
var links = $('a[href]').toArray();
|
|
@@ -9231,7 +9365,7 @@ function requireMercury() {
|
|
|
9231
9365
|
|
|
9232
9366
|
// now that we've scored all possible pages,
|
|
9233
9367
|
// find the biggest one.
|
|
9234
|
-
var topPage = _Reflect$
|
|
9368
|
+
var topPage = _Reflect$ownKeys$1(scoredLinks).reduce(function (acc, link) {
|
|
9235
9369
|
var scoredLink = scoredLinks[link];
|
|
9236
9370
|
return scoredLink.score > acc.score ? scoredLink : acc;
|
|
9237
9371
|
}, {
|
|
@@ -9248,7 +9382,7 @@ function requireMercury() {
|
|
|
9248
9382
|
};
|
|
9249
9383
|
var CANONICAL_META_SELECTORS = ['og:url'];
|
|
9250
9384
|
function parseDomain(url) {
|
|
9251
|
-
var parsedUrl =
|
|
9385
|
+
var parsedUrl = URL$1$1.parse(url);
|
|
9252
9386
|
var hostname = parsedUrl.hostname;
|
|
9253
9387
|
return hostname;
|
|
9254
9388
|
}
|
|
@@ -9277,11 +9411,61 @@ function requireMercury() {
|
|
|
9277
9411
|
return result(url);
|
|
9278
9412
|
}
|
|
9279
9413
|
};
|
|
9414
|
+
var defaults = {
|
|
9415
|
+
ellipse: '…',
|
|
9416
|
+
chars: [' ', '-'],
|
|
9417
|
+
max: 140,
|
|
9418
|
+
truncate: true
|
|
9419
|
+
};
|
|
9420
|
+
function ellipsizeMiddle(str, max, ellipse, chars) {
|
|
9421
|
+
if (str.length <= max) return str;
|
|
9422
|
+
if (max < 2) return str.slice(0, max - ellipse.length) + ellipse;
|
|
9423
|
+
var maxLen = max - ellipse.length;
|
|
9424
|
+
var middle = Math.floor(maxLen / 2);
|
|
9425
|
+
var left = middle;
|
|
9426
|
+
var right = str.length - middle;
|
|
9427
|
+
for (var i = 0; i < middle; i += 1) {
|
|
9428
|
+
var charLeft = str.charAt(i);
|
|
9429
|
+
var posRight = str.length - i;
|
|
9430
|
+
var charRight = str.charAt(posRight);
|
|
9431
|
+
if (chars.indexOf(charLeft) !== -1) left = i;
|
|
9432
|
+
if (chars.indexOf(charRight) !== -1) right = posRight;
|
|
9433
|
+
}
|
|
9434
|
+
return str.slice(0, left) + ellipse + str.slice(right);
|
|
9435
|
+
}
|
|
9436
|
+
function ellipsize(str, max, ellipse, chars, truncate) {
|
|
9437
|
+
if (str.length <= max) return str;
|
|
9438
|
+
var maxLen = max - ellipse.length;
|
|
9439
|
+
var end = maxLen;
|
|
9440
|
+
var breakpointFound = false;
|
|
9441
|
+
for (var i = 0; i <= maxLen; i += 1) {
|
|
9442
|
+
var _char = str.charAt(i);
|
|
9443
|
+
if (chars.indexOf(_char) !== -1) {
|
|
9444
|
+
end = i;
|
|
9445
|
+
breakpointFound = true;
|
|
9446
|
+
}
|
|
9447
|
+
}
|
|
9448
|
+
if (!truncate && !breakpointFound) return '';
|
|
9449
|
+
return str.slice(0, end) + ellipse;
|
|
9450
|
+
}
|
|
9451
|
+
var ellipsize$1 = function ellipsize$1(str, max, opts) {
|
|
9452
|
+
if (typeof str !== 'string' || str.length === 0) return '';
|
|
9453
|
+
if (max === 0) return '';
|
|
9454
|
+
opts = opts || {};
|
|
9455
|
+
_Object$keys$1(defaults).forEach(function (key) {
|
|
9456
|
+
if (opts[key] === null || typeof opts[key] === 'undefined') {
|
|
9457
|
+
opts[key] = defaults[key];
|
|
9458
|
+
}
|
|
9459
|
+
});
|
|
9460
|
+
opts.max = max || opts.max;
|
|
9461
|
+
if (opts.truncate === 'middle') return ellipsizeMiddle(str, opts.max, opts.ellipse, opts.chars);
|
|
9462
|
+
return ellipsize(str, opts.max, opts.ellipse, opts.chars, opts.truncate);
|
|
9463
|
+
};
|
|
9280
9464
|
var EXCERPT_META_SELECTORS = ['og:description', 'twitter:description'];
|
|
9281
9465
|
function clean(content, $) {
|
|
9282
9466
|
var maxLength = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : 200;
|
|
9283
9467
|
content = content.replace(/[\s\n]+/g, ' ').trim();
|
|
9284
|
-
return
|
|
9468
|
+
return ellipsize$1(content, maxLength, {
|
|
9285
9469
|
ellipse: '…'
|
|
9286
9470
|
});
|
|
9287
9471
|
}
|
|
@@ -9321,11 +9505,11 @@ function requireMercury() {
|
|
|
9321
9505
|
}
|
|
9322
9506
|
};
|
|
9323
9507
|
function ownKeys$3(e, r) {
|
|
9324
|
-
var t = _Object$
|
|
9325
|
-
if (_Object$
|
|
9326
|
-
var o = _Object$
|
|
9508
|
+
var t = _Object$keys$1(e);
|
|
9509
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
9510
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
9327
9511
|
r && (o = o.filter(function (r) {
|
|
9328
|
-
return _Object$
|
|
9512
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
9329
9513
|
})), t.push.apply(t, o);
|
|
9330
9514
|
}
|
|
9331
9515
|
return t;
|
|
@@ -9333,10 +9517,10 @@ function requireMercury() {
|
|
|
9333
9517
|
function _objectSpread$3(e) {
|
|
9334
9518
|
for (var r = 1; r < arguments.length; r++) {
|
|
9335
9519
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
9336
|
-
r % 2 ? ownKeys$3(Object(t),
|
|
9337
|
-
|
|
9338
|
-
}) : _Object$
|
|
9339
|
-
_Object$
|
|
9520
|
+
r % 2 ? ownKeys$3(Object(t), true).forEach(function (r) {
|
|
9521
|
+
_defineProperty$1(e, r, t[r]);
|
|
9522
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$3(Object(t)).forEach(function (r) {
|
|
9523
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
9340
9524
|
});
|
|
9341
9525
|
}
|
|
9342
9526
|
return e;
|
|
@@ -9356,7 +9540,7 @@ function requireMercury() {
|
|
|
9356
9540
|
word_count: GenericWordCountExtractor.extract,
|
|
9357
9541
|
direction: function direction(_ref) {
|
|
9358
9542
|
var title = _ref.title;
|
|
9359
|
-
return
|
|
9543
|
+
return stringDirection.getDirection(title);
|
|
9360
9544
|
},
|
|
9361
9545
|
extract: function extract(options) {
|
|
9362
9546
|
var html = options.html,
|
|
@@ -9411,24 +9595,24 @@ function requireMercury() {
|
|
|
9411
9595
|
'meta[name="generator"][value="blogger"]': BloggerExtractor
|
|
9412
9596
|
};
|
|
9413
9597
|
function detectByHtml($) {
|
|
9414
|
-
var selector = _Reflect$
|
|
9598
|
+
var selector = _Reflect$ownKeys$1(Detectors).find(function (s) {
|
|
9415
9599
|
return $(s).length > 0;
|
|
9416
9600
|
});
|
|
9417
9601
|
return Detectors[selector];
|
|
9418
9602
|
}
|
|
9419
9603
|
function getExtractor(url, parsedUrl, $) {
|
|
9420
|
-
parsedUrl = parsedUrl ||
|
|
9604
|
+
parsedUrl = parsedUrl || URL$1$1.parse(url);
|
|
9421
9605
|
var _parsedUrl = parsedUrl,
|
|
9422
9606
|
hostname = _parsedUrl.hostname;
|
|
9423
9607
|
var baseDomain = hostname.split('.').slice(-2).join('.');
|
|
9424
9608
|
return apiExtractors[hostname] || apiExtractors[baseDomain] || Extractors[hostname] || Extractors[baseDomain] || detectByHtml($) || GenericExtractor;
|
|
9425
9609
|
}
|
|
9426
9610
|
function ownKeys$2(e, r) {
|
|
9427
|
-
var t = _Object$
|
|
9428
|
-
if (_Object$
|
|
9429
|
-
var o = _Object$
|
|
9611
|
+
var t = _Object$keys$1(e);
|
|
9612
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
9613
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
9430
9614
|
r && (o = o.filter(function (r) {
|
|
9431
|
-
return _Object$
|
|
9615
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
9432
9616
|
})), t.push.apply(t, o);
|
|
9433
9617
|
}
|
|
9434
9618
|
return t;
|
|
@@ -9436,10 +9620,10 @@ function requireMercury() {
|
|
|
9436
9620
|
function _objectSpread$2(e) {
|
|
9437
9621
|
for (var r = 1; r < arguments.length; r++) {
|
|
9438
9622
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
9439
|
-
r % 2 ? ownKeys$2(Object(t),
|
|
9440
|
-
|
|
9441
|
-
}) : _Object$
|
|
9442
|
-
_Object$
|
|
9623
|
+
r % 2 ? ownKeys$2(Object(t), true).forEach(function (r) {
|
|
9624
|
+
_defineProperty$1(e, r, t[r]);
|
|
9625
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$2(Object(t)).forEach(function (r) {
|
|
9626
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
9443
9627
|
});
|
|
9444
9628
|
}
|
|
9445
9629
|
return e;
|
|
@@ -9457,7 +9641,7 @@ function requireMercury() {
|
|
|
9457
9641
|
function transformElements($content, $, _ref2) {
|
|
9458
9642
|
var transforms = _ref2.transforms;
|
|
9459
9643
|
if (!transforms) return $content;
|
|
9460
|
-
_Reflect$
|
|
9644
|
+
_Reflect$ownKeys$1(transforms).forEach(function (key) {
|
|
9461
9645
|
var $matches = $(key, $content);
|
|
9462
9646
|
var value = transforms[key];
|
|
9463
9647
|
|
|
@@ -9481,13 +9665,13 @@ function requireMercury() {
|
|
|
9481
9665
|
}
|
|
9482
9666
|
function findMatchingSelector($, selectors, extractHtml, allowMultiple) {
|
|
9483
9667
|
return selectors.find(function (selector) {
|
|
9484
|
-
if (_Array$
|
|
9668
|
+
if (_Array$isArray$1(selector)) {
|
|
9485
9669
|
if (extractHtml) {
|
|
9486
9670
|
return selector.reduce(function (acc, s) {
|
|
9487
9671
|
return acc && $(s).length > 0;
|
|
9488
9672
|
}, true);
|
|
9489
9673
|
}
|
|
9490
|
-
var _selector =
|
|
9674
|
+
var _selector = _slicedToArray$1(selector, 2),
|
|
9491
9675
|
s = _selector[0],
|
|
9492
9676
|
attr = _selector[1];
|
|
9493
9677
|
return (allowMultiple || !allowMultiple && $(s).length === 1) && $(s).attr(attr) && $(s).attr(attr).trim() !== '';
|
|
@@ -9529,7 +9713,7 @@ function requireMercury() {
|
|
|
9529
9713
|
// multi-match selection, which allows the parser to choose several
|
|
9530
9714
|
// selectors to include in the result. Note that all selectors in the
|
|
9531
9715
|
// array must match in order for this selector to trigger
|
|
9532
|
-
if (_Array$
|
|
9716
|
+
if (_Array$isArray$1(matchingSelector)) {
|
|
9533
9717
|
$content = $(matchingSelector.join(','));
|
|
9534
9718
|
var $wrapper = $('<div></div>');
|
|
9535
9719
|
$content.each(function (_, element) {
|
|
@@ -9563,8 +9747,8 @@ function requireMercury() {
|
|
|
9563
9747
|
var result;
|
|
9564
9748
|
// if selector is an array (e.g., ['img', 'src']),
|
|
9565
9749
|
// extract the attr
|
|
9566
|
-
if (_Array$
|
|
9567
|
-
var _matchingSelector =
|
|
9750
|
+
if (_Array$isArray$1(matchingSelector)) {
|
|
9751
|
+
var _matchingSelector = _slicedToArray$1(matchingSelector, 3),
|
|
9568
9752
|
selector = _matchingSelector[0],
|
|
9569
9753
|
attr = _matchingSelector[1],
|
|
9570
9754
|
transform = _matchingSelector[2];
|
|
@@ -9581,7 +9765,7 @@ function requireMercury() {
|
|
|
9581
9765
|
return $(el).text().trim();
|
|
9582
9766
|
});
|
|
9583
9767
|
}
|
|
9584
|
-
result = _Array$
|
|
9768
|
+
result = _Array$isArray$1(result.toArray()) && allowMultiple ? result.toArray() : result[0];
|
|
9585
9769
|
// Allow custom extractor to skip default cleaner
|
|
9586
9770
|
// for this type; defaults to true
|
|
9587
9771
|
if (defaultCleaner && Cleaners[type]) {
|
|
@@ -9591,7 +9775,7 @@ function requireMercury() {
|
|
|
9591
9775
|
}
|
|
9592
9776
|
function selectExtendedTypes(extend, opts) {
|
|
9593
9777
|
var results = {};
|
|
9594
|
-
_Reflect$
|
|
9778
|
+
_Reflect$ownKeys$1(extend).forEach(function (t) {
|
|
9595
9779
|
if (!results[t]) {
|
|
9596
9780
|
results[t] = select(_objectSpread$2(_objectSpread$2({}, opts), {}, {
|
|
9597
9781
|
type: t,
|
|
@@ -9709,11 +9893,11 @@ function requireMercury() {
|
|
|
9709
9893
|
}
|
|
9710
9894
|
};
|
|
9711
9895
|
function ownKeys$1(e, r) {
|
|
9712
|
-
var t = _Object$
|
|
9713
|
-
if (_Object$
|
|
9714
|
-
var o = _Object$
|
|
9896
|
+
var t = _Object$keys$1(e);
|
|
9897
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
9898
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
9715
9899
|
r && (o = o.filter(function (r) {
|
|
9716
|
-
return _Object$
|
|
9900
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
9717
9901
|
})), t.push.apply(t, o);
|
|
9718
9902
|
}
|
|
9719
9903
|
return t;
|
|
@@ -9721,10 +9905,10 @@ function requireMercury() {
|
|
|
9721
9905
|
function _objectSpread$1(e) {
|
|
9722
9906
|
for (var r = 1; r < arguments.length; r++) {
|
|
9723
9907
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
9724
|
-
r % 2 ? ownKeys$1(Object(t),
|
|
9725
|
-
|
|
9726
|
-
}) : _Object$
|
|
9727
|
-
_Object$
|
|
9908
|
+
r % 2 ? ownKeys$1(Object(t), true).forEach(function (r) {
|
|
9909
|
+
_defineProperty$1(e, r, t[r]);
|
|
9910
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys$1(Object(t)).forEach(function (r) {
|
|
9911
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
9728
9912
|
});
|
|
9729
9913
|
}
|
|
9730
9914
|
return e;
|
|
@@ -9733,9 +9917,9 @@ function requireMercury() {
|
|
|
9733
9917
|
return _collectAllPages.apply(this, arguments);
|
|
9734
9918
|
}
|
|
9735
9919
|
function _collectAllPages() {
|
|
9736
|
-
_collectAllPages =
|
|
9920
|
+
_collectAllPages = _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime.mark(function _callee(_ref) {
|
|
9737
9921
|
var next_page_url, html, $, metaCache, result, Extractor, title, url, pages, previousUrls, extractorOpts, nextPageResult, word_count;
|
|
9738
|
-
return
|
|
9922
|
+
return _regeneratorRuntime.wrap(function (_context) {
|
|
9739
9923
|
while (1) switch (_context.prev = _context.next) {
|
|
9740
9924
|
case 0:
|
|
9741
9925
|
next_page_url = _ref.next_page_url, html = _ref.html, $ = _ref.$, metaCache = _ref.metaCache, result = _ref.result, Extractor = _ref.Extractor, title = _ref.title, url = _ref.url;
|
|
@@ -9750,7 +9934,6 @@ function requireMercury() {
|
|
|
9750
9934
|
break;
|
|
9751
9935
|
}
|
|
9752
9936
|
pages += 1;
|
|
9753
|
-
// eslint-disable-next-line no-await-in-loop
|
|
9754
9937
|
_context.next = 2;
|
|
9755
9938
|
return Resource.create(next_page_url);
|
|
9756
9939
|
case 2:
|
|
@@ -9791,11 +9974,11 @@ function requireMercury() {
|
|
|
9791
9974
|
}
|
|
9792
9975
|
var _excluded = ["html"];
|
|
9793
9976
|
function ownKeys(e, r) {
|
|
9794
|
-
var t = _Object$
|
|
9795
|
-
if (_Object$
|
|
9796
|
-
var o = _Object$
|
|
9977
|
+
var t = _Object$keys$1(e);
|
|
9978
|
+
if (_Object$getOwnPropertySymbols$1) {
|
|
9979
|
+
var o = _Object$getOwnPropertySymbols$1(e);
|
|
9797
9980
|
r && (o = o.filter(function (r) {
|
|
9798
|
-
return _Object$
|
|
9981
|
+
return _Object$getOwnPropertyDescriptor$1(e, r).enumerable;
|
|
9799
9982
|
})), t.push.apply(t, o);
|
|
9800
9983
|
}
|
|
9801
9984
|
return t;
|
|
@@ -9803,10 +9986,10 @@ function requireMercury() {
|
|
|
9803
9986
|
function _objectSpread(e) {
|
|
9804
9987
|
for (var r = 1; r < arguments.length; r++) {
|
|
9805
9988
|
var t = null != arguments[r] ? arguments[r] : {};
|
|
9806
|
-
r % 2 ? ownKeys(Object(t),
|
|
9807
|
-
|
|
9808
|
-
}) : _Object$
|
|
9809
|
-
_Object$
|
|
9989
|
+
r % 2 ? ownKeys(Object(t), true).forEach(function (r) {
|
|
9990
|
+
_defineProperty$1(e, r, t[r]);
|
|
9991
|
+
}) : _Object$getOwnPropertyDescriptors$1 ? _Object$defineProperties$1(e, _Object$getOwnPropertyDescriptors$1(t)) : ownKeys(Object(t)).forEach(function (r) {
|
|
9992
|
+
_Object$defineProperty$1(e, r, _Object$getOwnPropertyDescriptor$1(t, r));
|
|
9810
9993
|
});
|
|
9811
9994
|
}
|
|
9812
9995
|
return e;
|
|
@@ -9814,12 +9997,12 @@ function requireMercury() {
|
|
|
9814
9997
|
var Parser = {
|
|
9815
9998
|
parse: function parse(url) {
|
|
9816
9999
|
var _arguments = arguments;
|
|
9817
|
-
return
|
|
10000
|
+
return _asyncToGenerator(/*#__PURE__*/_regeneratorRuntime.mark(function _callee() {
|
|
9818
10001
|
var _ref, html, opts, _opts$fetchAllPages, fetchAllPages, _opts$fallback, fallback, _opts$contentType, contentType, _opts$headers, headers, extend, customExtractor, parsedUrl, $, Extractor, metaCache, extendedTypes, result, _result, title, next_page_url, turndownService;
|
|
9819
|
-
return
|
|
10002
|
+
return _regeneratorRuntime.wrap(function (_context) {
|
|
9820
10003
|
while (1) switch (_context.prev = _context.next) {
|
|
9821
10004
|
case 0:
|
|
9822
|
-
_ref = _arguments.length > 1 && _arguments[1] !== undefined ? _arguments[1] : {}, html = _ref.html, opts =
|
|
10005
|
+
_ref = _arguments.length > 1 && _arguments[1] !== undefined ? _arguments[1] : {}, html = _ref.html, opts = _objectWithoutProperties(_ref, _excluded);
|
|
9823
10006
|
_opts$fetchAllPages = opts.fetchAllPages, fetchAllPages = _opts$fetchAllPages === void 0 ? true : _opts$fetchAllPages, _opts$fallback = opts.fallback, fallback = _opts$fallback === void 0 ? true : _opts$fallback, _opts$contentType = opts.contentType, contentType = _opts$contentType === void 0 ? 'html' : _opts$contentType, _opts$headers = opts.headers, headers = _opts$headers === void 0 ? {} : _opts$headers, extend = opts.extend, customExtractor = opts.customExtractor; // if no url was passed and this is the browser version,
|
|
9824
10007
|
// set url to window.location.href and load the html
|
|
9825
10008
|
// from the current page
|
|
@@ -9827,7 +10010,7 @@ function requireMercury() {
|
|
|
9827
10010
|
url = window.location.href; // eslint-disable-line no-undef
|
|
9828
10011
|
html = html || document.documentElement.outerHTML; // eslint-disable-line no-undef
|
|
9829
10012
|
}
|
|
9830
|
-
parsedUrl =
|
|
10013
|
+
parsedUrl = URL$1$1.parse(url);
|
|
9831
10014
|
if (validateUrl(parsedUrl)) {
|
|
9832
10015
|
_context.next = 1;
|
|
9833
10016
|
break;
|
|
@@ -9907,7 +10090,7 @@ function requireMercury() {
|
|
|
9907
10090
|
});
|
|
9908
10091
|
case 6:
|
|
9909
10092
|
if (contentType === 'markdown') {
|
|
9910
|
-
turndownService = new
|
|
10093
|
+
turndownService = new TurndownService();
|
|
9911
10094
|
result.content = turndownService.turndown(result.content);
|
|
9912
10095
|
} else if (contentType === 'text') {
|
|
9913
10096
|
result.content = $.text($(result.content));
|
|
@@ -9963,7 +10146,7 @@ function template(strings) {
|
|
|
9963
10146
|
}
|
|
9964
10147
|
var compiled = insertValues.apply(void 0, [strings].concat(values));
|
|
9965
10148
|
var _ref = compiled.match(bodyPattern) || [],
|
|
9966
|
-
_ref2 =
|
|
10149
|
+
_ref2 = _slicedToArray(_ref, 1),
|
|
9967
10150
|
body = _ref2[0];
|
|
9968
10151
|
var indentLevel = /^\s{0,4}(.+)$/g;
|
|
9969
10152
|
if (!body) {
|
|
@@ -9981,7 +10164,7 @@ function template(strings) {
|
|
|
9981
10164
|
|
|
9982
10165
|
var _templateObject$1;
|
|
9983
10166
|
function extractorTemplate (hostname, name) {
|
|
9984
|
-
return template(_templateObject$1 || (_templateObject$1 =
|
|
10167
|
+
return template(_templateObject$1 || (_templateObject$1 = _taggedTemplateLiteral(["\n export const ", " = {\n domain: '", "',\n\n title: {\n selectors: [\n // enter title selectors\n ],\n },\n\n author: {\n selectors: [\n // enter author selectors\n ],\n },\n\n date_published: {\n selectors: [\n // enter selectors\n ],\n },\n\n lead_image_url: {\n selectors: [\n // enter selectors\n ],\n },\n\n content: {\n selectors: [\n // enter content selectors\n ],\n\n // Is there anything in the content you selected that needs transformed\n // before it's consumable content? E.g., unusual lazy loaded images\n transforms: {\n },\n\n // Is there anything that is in the result that shouldn't be?\n // The clean selectors will remove anything that matches from\n // the result\n clean: [\n\n ]\n },\n }\n "])), name, hostname);
|
|
9985
10168
|
}
|
|
9986
10169
|
|
|
9987
10170
|
var _templateObject, _templateObject2;
|
|
@@ -9990,10 +10173,10 @@ function testFor(key, value, dir) {
|
|
|
9990
10173
|
if (IGNORE.find(function (k) {
|
|
9991
10174
|
return k === key;
|
|
9992
10175
|
})) return '';
|
|
9993
|
-
return template(_templateObject || (_templateObject =
|
|
10176
|
+
return template(_templateObject || (_templateObject = _taggedTemplateLiteral(["\n it('returns the ", "', async () => {\n // To pass this test, fill out the ", " selector\n // in ", "/index.js.\n const { ", " } = await result\n\n // Update these values with the expected values from\n // the article.\n assert.strictEqual(", ", ", ")\n });\n "])), key, key, dir, key, key, value ? "`".concat(value, "`") : "''");
|
|
9994
10177
|
}
|
|
9995
10178
|
function extractorTestTemplate (file, url, dir, result, name) {
|
|
9996
|
-
return template(_templateObject2 || (_templateObject2 =
|
|
10179
|
+
return template(_templateObject2 || (_templateObject2 = _taggedTemplateLiteral(["\n import assert from 'assert';\n import * as cheerio from 'cheerio';\n\n import Parser from 'mercury';\n import getExtractor from 'extractors/get-extractor';\n import { excerptContent } from 'utils/text';\n\n const fs = require('fs');\n\n describe('", "', () => {\n describe('initial test case', () => {\n let result;\n let url;\n beforeAll(() => {\n url =\n '", "';\n const html =\n fs.readFileSync('", "');\n result =\n Parser.parse(url, { html, fallback: false });\n });\n\n it('is selected properly', () => {\n // This test should be passing by default.\n // It sanity checks that the correct parser\n // is being selected for URLs from this domain\n const extractor = getExtractor(url);\n assert.strictEqual(extractor.domain, new URL(url).hostname)\n })\n\n ", "\n\n it('returns the content', async () => {\n // To pass this test, fill out the content selector\n // in ", "/index.js.\n // You may also want to make use of the clean and transform\n // options.\n const { content } = await result;\n\n const $ = cheerio.load(content || '');\n\n const first13 = excerptContent($('*').first().text(), 13)\n\n // Update these values with the expected values from the article.\n // Add the first 13 words of the article here\n assert.strictEqual(first13, null);\n });\n });\n });\n "])), name, url, file, _Reflect$ownKeys(result).map(function (k) {
|
|
9997
10180
|
return testFor(k, result[k], dir);
|
|
9998
10181
|
}).join('\n\n'), dir);
|
|
9999
10182
|
}
|
|
@@ -10011,11 +10194,11 @@ var questions = [{
|
|
|
10011
10194
|
}];
|
|
10012
10195
|
var spinner;
|
|
10013
10196
|
function confirm(fn, args, msg, newParser) {
|
|
10014
|
-
spinner =
|
|
10197
|
+
spinner = ora({
|
|
10015
10198
|
text: msg
|
|
10016
10199
|
});
|
|
10017
10200
|
spinner.start();
|
|
10018
|
-
var result = fn.apply(void 0,
|
|
10201
|
+
var result = fn.apply(void 0, _toConsumableArray(args));
|
|
10019
10202
|
if (result && result.then) {
|
|
10020
10203
|
result.then(function (r) {
|
|
10021
10204
|
if (r && r.error) {
|
|
@@ -10035,8 +10218,8 @@ function confirm(fn, args, msg, newParser) {
|
|
|
10035
10218
|
return result;
|
|
10036
10219
|
}
|
|
10037
10220
|
function confirmCreateDir(dir, msg) {
|
|
10038
|
-
if (!
|
|
10039
|
-
confirm(
|
|
10221
|
+
if (!fs.existsSync(dir)) {
|
|
10222
|
+
confirm(fs.mkdirSync, [dir], msg);
|
|
10040
10223
|
}
|
|
10041
10224
|
}
|
|
10042
10225
|
function getDir(url) {
|
|
@@ -10049,7 +10232,7 @@ function scaffoldCustomParser(url) {
|
|
|
10049
10232
|
var _URL3 = new URL(url),
|
|
10050
10233
|
hostname = _URL3.hostname;
|
|
10051
10234
|
var newParser = false;
|
|
10052
|
-
if (!
|
|
10235
|
+
if (!fs.existsSync(dir)) {
|
|
10053
10236
|
newParser = true;
|
|
10054
10237
|
confirmCreateDir(dir, "Creating ".concat(hostname, " directory"));
|
|
10055
10238
|
confirmCreateDir("./fixtures/".concat(hostname), 'Creating fixtures directory');
|
|
@@ -10062,7 +10245,7 @@ var urlArg = process.argv[2];
|
|
|
10062
10245
|
if (urlArg) {
|
|
10063
10246
|
scaffoldCustomParser(urlArg);
|
|
10064
10247
|
} else {
|
|
10065
|
-
|
|
10248
|
+
inquirer.prompt(questions).then(function (answers) {
|
|
10066
10249
|
scaffoldCustomParser(answers.website);
|
|
10067
10250
|
});
|
|
10068
10251
|
}
|
|
@@ -10071,13 +10254,13 @@ function generateScaffold(url, file, result) {
|
|
|
10071
10254
|
hostname = _URL4.hostname;
|
|
10072
10255
|
var extractor = extractorTemplate(hostname, extractorName(hostname));
|
|
10073
10256
|
var extractorTest = extractorTestTemplate(file, url, getDir(url), result, extractorName(hostname));
|
|
10074
|
-
|
|
10075
|
-
|
|
10076
|
-
|
|
10257
|
+
fs.writeFileSync("".concat(getDir(url), "/index.js"), extractor);
|
|
10258
|
+
fs.writeFileSync("".concat(getDir(url), "/index.test.js"), extractorTest);
|
|
10259
|
+
fs.appendFileSync('./src/extractors/custom/index.js', exportString(url));
|
|
10077
10260
|
child_process.exec("npm run lint-fix-quiet -- ".concat(getDir(url), "/*.js"));
|
|
10078
10261
|
}
|
|
10079
10262
|
function savePage($, _ref, newParser) {
|
|
10080
|
-
var _ref2 =
|
|
10263
|
+
var _ref2 = _slicedToArray(_ref, 1),
|
|
10081
10264
|
url = _ref2[0];
|
|
10082
10265
|
var _URL5 = new URL(url),
|
|
10083
10266
|
hostname = _URL5.hostname;
|
|
@@ -10094,7 +10277,7 @@ function savePage($, _ref, newParser) {
|
|
|
10094
10277
|
}
|
|
10095
10278
|
});
|
|
10096
10279
|
var html = stripJunkTags($('*').first(), $, ['script']).html();
|
|
10097
|
-
|
|
10280
|
+
fs.writeFileSync(file, html);
|
|
10098
10281
|
Parser.parse(url, {
|
|
10099
10282
|
html: html
|
|
10100
10283
|
}).then(function (result) {
|